summaryrefslogtreecommitdiffstats
path: root/src/spdk/lib/ftl
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/lib/ftl
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/lib/ftl')
-rw-r--r--src/spdk/lib/ftl/Makefile47
-rw-r--r--src/spdk/lib/ftl/ftl_addr.h76
-rw-r--r--src/spdk/lib/ftl/ftl_band.c1097
-rw-r--r--src/spdk/lib/ftl/ftl_band.h287
-rw-r--r--src/spdk/lib/ftl/ftl_core.c2460
-rw-r--r--src/spdk/lib/ftl/ftl_core.h552
-rw-r--r--src/spdk/lib/ftl/ftl_debug.c169
-rw-r--r--src/spdk/lib/ftl/ftl_debug.h73
-rw-r--r--src/spdk/lib/ftl/ftl_init.c1688
-rw-r--r--src/spdk/lib/ftl/ftl_io.c563
-rw-r--r--src/spdk/lib/ftl/ftl_io.h351
-rw-r--r--src/spdk/lib/ftl/ftl_reloc.c860
-rw-r--r--src/spdk/lib/ftl/ftl_reloc.h53
-rw-r--r--src/spdk/lib/ftl/ftl_restore.c1350
-rw-r--r--src/spdk/lib/ftl/ftl_trace.c361
-rw-r--r--src/spdk/lib/ftl/ftl_trace.h84
-rw-r--r--src/spdk/lib/ftl/spdk_ftl.map14
17 files changed, 10085 insertions, 0 deletions
diff --git a/src/spdk/lib/ftl/Makefile b/src/spdk/lib/ftl/Makefile
new file mode 100644
index 000000000..c24274622
--- /dev/null
+++ b/src/spdk/lib/ftl/Makefile
@@ -0,0 +1,47 @@
+#
+# BSD LICENSE
+#
+# Copyright (c) Intel Corporation.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
+include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
+
+SO_VER := 2
+SO_MINOR := 0
+
+C_SRCS = ftl_band.c ftl_core.c ftl_debug.c ftl_io.c ftl_reloc.c \
+ ftl_restore.c ftl_init.c ftl_trace.c
+
+SPDK_MAP_FILE = $(abspath $(CURDIR)/spdk_ftl.map)
+
+LIBNAME = ftl
+
+include $(SPDK_ROOT_DIR)/mk/spdk.lib.mk
diff --git a/src/spdk/lib/ftl/ftl_addr.h b/src/spdk/lib/ftl/ftl_addr.h
new file mode 100644
index 000000000..36d2ffb00
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_addr.h
@@ -0,0 +1,76 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FTL_ADDR_H
+#define FTL_ADDR_H
+
+#include "spdk/stdinc.h"
+
+/* Marks address as invalid */
+#define FTL_ADDR_INVALID (-1)
+/* Marks LBA as invalid */
+#define FTL_LBA_INVALID ((uint64_t)-1)
+/* Smallest data unit size */
+#define FTL_BLOCK_SIZE 4096
+
+/* This structure represents on-disk address. It can have one of the following */
+/* formats: */
+/* - offset inside the disk */
+/* - cache_offset inside the cache (indicated by the cached flag) */
+/* - packed version of the two formats above (can be only used when the */
+/* offset can be represented in less than 32 bits) */
+/* Packed format is used, when possible, to avoid wasting RAM on the L2P table. */
+struct ftl_addr {
+ union {
+ struct {
+ uint64_t cache_offset : 63;
+ uint64_t cached : 1;
+ };
+
+ struct {
+ union {
+ struct {
+ uint32_t cache_offset : 31;
+ uint32_t cached : 1;
+ };
+
+ uint32_t offset;
+ };
+ uint32_t rsvd;
+ } pack;
+
+ uint64_t offset;
+ };
+};
+
+#endif /* FTL_ADDR_H */
diff --git a/src/spdk/lib/ftl/ftl_band.c b/src/spdk/lib/ftl/ftl_band.c
new file mode 100644
index 000000000..62221dcf6
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_band.c
@@ -0,0 +1,1097 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/crc32.h"
+#include "spdk/likely.h"
+#include "spdk/util.h"
+#include "spdk/ftl.h"
+
+#include "ftl_band.h"
+#include "ftl_io.h"
+#include "ftl_core.h"
+#include "ftl_reloc.h"
+#include "ftl_debug.h"
+
+/* TODO: define some signature for meta version */
+#define FTL_MD_VER 1
+
+struct __attribute__((packed)) ftl_md_hdr {
+ /* Device instance */
+ struct spdk_uuid uuid;
+
+ /* Meta version */
+ uint8_t ver;
+
+ /* Sequence number */
+ uint64_t seq;
+
+ /* CRC32 checksum */
+ uint32_t checksum;
+};
+
+/* End metadata layout stored on media (with all three being aligned to block size): */
+/* - header */
+/* - valid bitmap */
+/* - LBA map */
+struct __attribute__((packed)) ftl_tail_md {
+ struct ftl_md_hdr hdr;
+
+ /* Max number of blocks */
+ uint64_t num_blocks;
+
+ uint8_t reserved[4059];
+};
+SPDK_STATIC_ASSERT(sizeof(struct ftl_tail_md) == FTL_BLOCK_SIZE, "Incorrect metadata size");
+
+struct __attribute__((packed)) ftl_head_md {
+ struct ftl_md_hdr hdr;
+
+ /* Number of defrag cycles */
+ uint64_t wr_cnt;
+
+ /* Number of surfaced LBAs */
+ uint64_t lba_cnt;
+
+ /* Transfer size */
+ uint32_t xfer_size;
+};
+
+size_t
+ftl_tail_md_hdr_num_blocks(void)
+{
+ return spdk_divide_round_up(sizeof(struct ftl_tail_md), FTL_BLOCK_SIZE);
+}
+
+size_t
+ftl_vld_map_num_blocks(const struct spdk_ftl_dev *dev)
+{
+ return spdk_divide_round_up(ftl_vld_map_size(dev), FTL_BLOCK_SIZE);
+}
+
+size_t
+ftl_lba_map_num_blocks(const struct spdk_ftl_dev *dev)
+{
+ return spdk_divide_round_up(ftl_get_num_blocks_in_band(dev) * sizeof(uint64_t), FTL_BLOCK_SIZE);
+}
+
+size_t
+ftl_head_md_num_blocks(const struct spdk_ftl_dev *dev)
+{
+ return dev->xfer_size;
+}
+
+size_t
+ftl_tail_md_num_blocks(const struct spdk_ftl_dev *dev)
+{
+ return spdk_divide_round_up(ftl_tail_md_hdr_num_blocks() +
+ ftl_vld_map_num_blocks(dev) +
+ ftl_lba_map_num_blocks(dev),
+ dev->xfer_size) * dev->xfer_size;
+}
+
+static uint64_t
+ftl_band_tail_md_offset(const struct ftl_band *band)
+{
+ return ftl_band_num_usable_blocks(band) -
+ ftl_tail_md_num_blocks(band->dev);
+}
+
+int
+ftl_band_full(struct ftl_band *band, size_t offset)
+{
+ return offset == ftl_band_tail_md_offset(band);
+}
+
+void
+ftl_band_write_failed(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+
+ band->high_prio = 1;
+
+ ftl_reloc_add(dev->reloc, band, 0, ftl_get_num_blocks_in_band(dev), 1, true);
+ ftl_band_set_state(band, FTL_BAND_STATE_CLOSED);
+}
+
+static void
+ftl_band_free_lba_map(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_lba_map *lba_map = &band->lba_map;
+
+ assert(band->state == FTL_BAND_STATE_CLOSED ||
+ band->state == FTL_BAND_STATE_FREE);
+ assert(lba_map->ref_cnt == 0);
+ assert(lba_map->map != NULL);
+ assert(!band->high_prio);
+
+ /* Verify that band's metadata is consistent with l2p */
+ if (band->num_zones) {
+ assert(ftl_band_validate_md(band) == true);
+ }
+
+ spdk_mempool_put(dev->lba_pool, lba_map->dma_buf);
+ lba_map->map = NULL;
+ lba_map->dma_buf = NULL;
+}
+
+static void
+_ftl_band_set_free(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_band *lband, *prev;
+
+ /* Remove the band from the closed band list */
+ LIST_REMOVE(band, list_entry);
+
+ /* Keep the list sorted by band's write count */
+ LIST_FOREACH(lband, &dev->free_bands, list_entry) {
+ if (lband->wr_cnt > band->wr_cnt) {
+ LIST_INSERT_BEFORE(lband, band, list_entry);
+ break;
+ }
+ prev = lband;
+ }
+
+ if (!lband) {
+ if (LIST_EMPTY(&dev->free_bands)) {
+ LIST_INSERT_HEAD(&dev->free_bands, band, list_entry);
+ } else {
+ LIST_INSERT_AFTER(prev, band, list_entry);
+ }
+ }
+
+#if defined(DEBUG)
+ prev = NULL;
+ LIST_FOREACH(lband, &dev->free_bands, list_entry) {
+ if (!prev) {
+ continue;
+ }
+ assert(prev->wr_cnt <= lband->wr_cnt);
+ }
+#endif
+ dev->num_free++;
+ ftl_apply_limits(dev);
+}
+
+static void
+_ftl_band_set_preparing(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+
+ /* Remove band from free list */
+ LIST_REMOVE(band, list_entry);
+
+ band->wr_cnt++;
+
+ assert(dev->num_free > 0);
+ dev->num_free--;
+
+ ftl_apply_limits(dev);
+}
+
+static void
+_ftl_band_set_closed(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+
+ /* Set the state as free_md() checks for that */
+ band->state = FTL_BAND_STATE_CLOSED;
+
+ /* Free the lba map if there are no outstanding IOs */
+ ftl_band_release_lba_map(band);
+
+ if (spdk_likely(band->num_zones)) {
+ LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
+ } else {
+ LIST_REMOVE(band, list_entry);
+ }
+}
+
+static uint32_t
+ftl_md_calc_crc(const struct ftl_md_hdr *hdr, size_t size)
+{
+ size_t checkoff = offsetof(struct ftl_md_hdr, checksum);
+ size_t mdoff = checkoff + sizeof(hdr->checksum);
+ uint32_t crc;
+
+ crc = spdk_crc32c_update(hdr, checkoff, 0);
+ return spdk_crc32c_update((const char *)hdr + mdoff, size - mdoff, crc);
+}
+
+static void
+ftl_set_md_hdr(struct ftl_band *band, struct ftl_md_hdr *hdr, size_t size)
+{
+ hdr->seq = band->seq;
+ hdr->ver = FTL_MD_VER;
+ hdr->uuid = band->dev->uuid;
+ hdr->checksum = ftl_md_calc_crc(hdr, size);
+}
+
+static int
+ftl_pack_head_md(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_head_md *head = band->lba_map.dma_buf;
+
+ head->wr_cnt = band->wr_cnt;
+ head->lba_cnt = dev->num_lbas;
+ head->xfer_size = dev->xfer_size;
+ ftl_set_md_hdr(band, &head->hdr, sizeof(struct ftl_head_md));
+
+ return FTL_MD_SUCCESS;
+}
+
+static int
+ftl_pack_tail_md(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_lba_map *lba_map = &band->lba_map;
+ struct ftl_tail_md *tail = lba_map->dma_buf;
+ void *vld_offset;
+
+ vld_offset = (char *)tail + ftl_tail_md_hdr_num_blocks() * FTL_BLOCK_SIZE;
+
+ /* Clear out the buffer */
+ memset(tail, 0, ftl_tail_md_hdr_num_blocks() * FTL_BLOCK_SIZE);
+ tail->num_blocks = ftl_get_num_blocks_in_band(dev);
+
+ pthread_spin_lock(&lba_map->lock);
+ spdk_bit_array_store_mask(lba_map->vld, vld_offset);
+ pthread_spin_unlock(&lba_map->lock);
+
+ ftl_set_md_hdr(band, &tail->hdr, ftl_tail_md_num_blocks(dev) * FTL_BLOCK_SIZE);
+
+ return FTL_MD_SUCCESS;
+}
+
+static int
+ftl_md_hdr_vld(struct spdk_ftl_dev *dev, const struct ftl_md_hdr *hdr, size_t size)
+{
+ if (spdk_uuid_compare(&dev->uuid, &hdr->uuid) != 0) {
+ return FTL_MD_NO_MD;
+ }
+
+ if (hdr->ver != FTL_MD_VER) {
+ return FTL_MD_INVALID_VER;
+ }
+
+ if (ftl_md_calc_crc(hdr, size) != hdr->checksum) {
+ return FTL_MD_INVALID_CRC;
+ }
+
+ return FTL_MD_SUCCESS;
+}
+
+static int
+ftl_unpack_tail_md(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ void *vld_offset;
+ struct ftl_lba_map *lba_map = &band->lba_map;
+ struct ftl_tail_md *tail = lba_map->dma_buf;
+ int rc;
+
+ vld_offset = (char *)tail + ftl_tail_md_hdr_num_blocks() * FTL_BLOCK_SIZE;
+
+ rc = ftl_md_hdr_vld(dev, &tail->hdr, ftl_tail_md_num_blocks(dev) * FTL_BLOCK_SIZE);
+ if (rc) {
+ return rc;
+ }
+
+ /*
+ * When restoring from a dirty shutdown it's possible old tail meta wasn't yet cleared -
+ * band had saved head meta, but didn't manage to send erase to all zones.
+ * The already found tail md header is valid, but inconsistent with the head meta. Treat
+ * such a band as open/without valid tail md.
+ */
+ if (band->seq != tail->hdr.seq) {
+ return FTL_MD_NO_MD;
+ }
+
+ if (tail->num_blocks != ftl_get_num_blocks_in_band(dev)) {
+ return FTL_MD_INVALID_SIZE;
+ }
+
+ spdk_bit_array_load_mask(lba_map->vld, vld_offset);
+
+ return FTL_MD_SUCCESS;
+}
+
+static int
+ftl_unpack_head_md(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_head_md *head = band->lba_map.dma_buf;
+ int rc;
+
+ rc = ftl_md_hdr_vld(dev, &head->hdr, sizeof(struct ftl_head_md));
+ if (rc) {
+ return rc;
+ }
+
+ band->seq = head->hdr.seq;
+ band->wr_cnt = head->wr_cnt;
+
+ if (dev->global_md.num_lbas == 0) {
+ dev->global_md.num_lbas = head->lba_cnt;
+ }
+
+ if (dev->global_md.num_lbas != head->lba_cnt) {
+ return FTL_MD_INVALID_SIZE;
+ }
+
+ if (dev->xfer_size != head->xfer_size) {
+ return FTL_MD_INVALID_SIZE;
+ }
+
+ return FTL_MD_SUCCESS;
+}
+
+struct ftl_addr
+ftl_band_tail_md_addr(struct ftl_band *band)
+{
+ struct ftl_addr addr = {};
+ struct ftl_zone *zone;
+ struct spdk_ftl_dev *dev = band->dev;
+ size_t xfer_size = dev->xfer_size;
+ size_t num_req = ftl_band_tail_md_offset(band) / xfer_size;
+ size_t i;
+
+ if (spdk_unlikely(!band->num_zones)) {
+ return ftl_to_addr(FTL_ADDR_INVALID);
+ }
+
+ /* Metadata should be aligned to xfer size */
+ assert(ftl_band_tail_md_offset(band) % xfer_size == 0);
+
+ zone = CIRCLEQ_FIRST(&band->zones);
+ for (i = 0; i < num_req % band->num_zones; ++i) {
+ zone = ftl_band_next_zone(band, zone);
+ }
+
+ addr.offset = (num_req / band->num_zones) * xfer_size;
+ addr.offset += zone->info.zone_id;
+
+ return addr;
+}
+
+struct ftl_addr
+ftl_band_head_md_addr(struct ftl_band *band)
+{
+ if (spdk_unlikely(!band->num_zones)) {
+ return ftl_to_addr(FTL_ADDR_INVALID);
+ }
+
+ return ftl_to_addr(CIRCLEQ_FIRST(&band->zones)->info.zone_id);
+}
+
+void
+ftl_band_set_state(struct ftl_band *band, enum ftl_band_state state)
+{
+ switch (state) {
+ case FTL_BAND_STATE_FREE:
+ assert(band->state == FTL_BAND_STATE_CLOSED);
+ _ftl_band_set_free(band);
+ break;
+
+ case FTL_BAND_STATE_PREP:
+ assert(band->state == FTL_BAND_STATE_FREE);
+ _ftl_band_set_preparing(band);
+ break;
+
+ case FTL_BAND_STATE_CLOSED:
+ if (band->state != FTL_BAND_STATE_CLOSED) {
+ assert(band->state == FTL_BAND_STATE_CLOSING || band->high_prio);
+ _ftl_band_set_closed(band);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ band->state = state;
+}
+
+void
+ftl_band_set_addr(struct ftl_band *band, uint64_t lba, struct ftl_addr addr)
+{
+ struct ftl_lba_map *lba_map = &band->lba_map;
+ uint64_t offset;
+
+ assert(lba != FTL_LBA_INVALID);
+
+ offset = ftl_band_block_offset_from_addr(band, addr);
+ pthread_spin_lock(&lba_map->lock);
+
+ lba_map->num_vld++;
+ lba_map->map[offset] = lba;
+ spdk_bit_array_set(lba_map->vld, offset);
+
+ pthread_spin_unlock(&lba_map->lock);
+}
+
+size_t
+ftl_band_age(const struct ftl_band *band)
+{
+ return (size_t)(band->dev->seq - band->seq);
+}
+
+size_t
+ftl_band_num_usable_blocks(const struct ftl_band *band)
+{
+ return band->num_zones * ftl_get_num_blocks_in_zone(band->dev);
+}
+
+size_t
+ftl_band_user_blocks_left(const struct ftl_band *band, size_t offset)
+{
+ size_t tail_md_offset = ftl_band_tail_md_offset(band);
+
+ if (spdk_unlikely(offset <= ftl_head_md_num_blocks(band->dev))) {
+ return ftl_band_user_blocks(band);
+ }
+
+ if (spdk_unlikely(offset > tail_md_offset)) {
+ return 0;
+ }
+
+ return tail_md_offset - offset;
+}
+
+size_t
+ftl_band_user_blocks(const struct ftl_band *band)
+{
+ return ftl_band_num_usable_blocks(band) -
+ ftl_head_md_num_blocks(band->dev) -
+ ftl_tail_md_num_blocks(band->dev);
+}
+
+struct ftl_band *
+ftl_band_from_addr(struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ size_t band_id = ftl_addr_get_band(dev, addr);
+
+ assert(band_id < ftl_get_num_bands(dev));
+ return &dev->bands[band_id];
+}
+
+struct ftl_zone *
+ftl_band_zone_from_addr(struct ftl_band *band, struct ftl_addr addr)
+{
+ size_t pu_id = ftl_addr_get_punit(band->dev, addr);
+
+ assert(pu_id < ftl_get_num_punits(band->dev));
+ return &band->zone_buf[pu_id];
+}
+
+uint64_t
+ftl_band_block_offset_from_addr(struct ftl_band *band, struct ftl_addr addr)
+{
+ assert(ftl_addr_get_band(band->dev, addr) == band->id);
+ assert(ftl_addr_get_punit(band->dev, addr) < ftl_get_num_punits(band->dev));
+ return addr.offset % ftl_get_num_blocks_in_band(band->dev);
+}
+
+struct ftl_addr
+ftl_band_next_xfer_addr(struct ftl_band *band, struct ftl_addr addr, size_t num_blocks)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_zone *zone;
+ size_t num_xfers, num_stripes;
+ uint64_t offset;
+
+ assert(ftl_addr_get_band(dev, addr) == band->id);
+
+ offset = ftl_addr_get_zone_offset(dev, addr);
+ zone = ftl_band_zone_from_addr(band, addr);
+
+ num_blocks += (offset % dev->xfer_size);
+ offset -= (offset % dev->xfer_size);
+
+#if defined(DEBUG)
+ /* Check that the number of zones has not been changed */
+ struct ftl_zone *_zone;
+ size_t _num_zones = 0;
+ CIRCLEQ_FOREACH(_zone, &band->zones, circleq) {
+ if (spdk_likely(_zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE)) {
+ _num_zones++;
+ }
+ }
+ assert(band->num_zones == _num_zones);
+#endif
+ assert(band->num_zones != 0);
+ num_stripes = (num_blocks / dev->xfer_size) / band->num_zones;
+ offset += num_stripes * dev->xfer_size;
+ num_blocks -= num_stripes * dev->xfer_size * band->num_zones;
+
+ if (offset > ftl_get_num_blocks_in_zone(dev)) {
+ return ftl_to_addr(FTL_ADDR_INVALID);
+ }
+
+ num_xfers = num_blocks / dev->xfer_size;
+ for (size_t i = 0; i < num_xfers; ++i) {
+ /* When the last zone is reached the block part of the address */
+ /* needs to be increased by xfer_size */
+ if (ftl_band_zone_is_last(band, zone)) {
+ offset += dev->xfer_size;
+ if (offset > ftl_get_num_blocks_in_zone(dev)) {
+ return ftl_to_addr(FTL_ADDR_INVALID);
+ }
+ }
+
+ zone = ftl_band_next_operational_zone(band, zone);
+ assert(zone);
+
+ num_blocks -= dev->xfer_size;
+ }
+
+ if (num_blocks) {
+ offset += num_blocks;
+ if (offset > ftl_get_num_blocks_in_zone(dev)) {
+ return ftl_to_addr(FTL_ADDR_INVALID);
+ }
+ }
+
+ addr.offset = zone->info.zone_id + offset;
+ return addr;
+}
+
+static size_t
+ftl_xfer_offset_from_addr(struct ftl_band *band, struct ftl_addr addr)
+{
+ struct ftl_zone *zone, *current_zone;
+ unsigned int punit_offset = 0;
+ size_t num_stripes, xfer_size = band->dev->xfer_size;
+ uint64_t offset;
+
+ assert(ftl_addr_get_band(band->dev, addr) == band->id);
+
+ offset = ftl_addr_get_zone_offset(band->dev, addr);
+ num_stripes = (offset / xfer_size) * band->num_zones;
+
+ current_zone = ftl_band_zone_from_addr(band, addr);
+ CIRCLEQ_FOREACH(zone, &band->zones, circleq) {
+ if (current_zone == zone) {
+ break;
+ }
+ punit_offset++;
+ }
+
+ return xfer_size * (num_stripes + punit_offset) + offset % xfer_size;
+}
+
+struct ftl_addr
+ftl_band_addr_from_block_offset(struct ftl_band *band, uint64_t block_off)
+{
+ struct ftl_addr addr = { .offset = 0 };
+
+ addr.offset = block_off + band->id * ftl_get_num_blocks_in_band(band->dev);
+ return addr;
+}
+
+struct ftl_addr
+ftl_band_next_addr(struct ftl_band *band, struct ftl_addr addr, size_t offset)
+{
+ uint64_t block_off = ftl_band_block_offset_from_addr(band, addr);
+ return ftl_band_addr_from_block_offset(band, block_off + offset);
+}
+
+void
+ftl_band_acquire_lba_map(struct ftl_band *band)
+{
+ assert(band->lba_map.map != NULL);
+ band->lba_map.ref_cnt++;
+}
+
+int
+ftl_band_alloc_lba_map(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_lba_map *lba_map = &band->lba_map;
+
+ assert(lba_map->ref_cnt == 0);
+ assert(lba_map->map == NULL);
+
+ lba_map->dma_buf = spdk_mempool_get(dev->lba_pool);
+
+ if (!lba_map->dma_buf) {
+ return -1;
+ }
+
+ memset(lba_map->dma_buf, 0, ftl_lba_map_pool_elem_size(band->dev));
+
+ lba_map->map = (uint64_t *)((char *)lba_map->dma_buf + FTL_BLOCK_SIZE *
+ (ftl_tail_md_hdr_num_blocks() + ftl_vld_map_num_blocks(dev)));
+
+ lba_map->segments = (char *)lba_map->dma_buf + ftl_tail_md_num_blocks(dev) * FTL_BLOCK_SIZE;
+
+ ftl_band_acquire_lba_map(band);
+ return 0;
+}
+
+void
+ftl_band_release_lba_map(struct ftl_band *band)
+{
+ struct ftl_lba_map *lba_map = &band->lba_map;
+
+ assert(lba_map->map != NULL);
+ assert(lba_map->ref_cnt > 0);
+ lba_map->ref_cnt--;
+
+ if (lba_map->ref_cnt == 0) {
+ ftl_band_free_lba_map(band);
+ }
+}
+
+static void
+ftl_read_md_cb(struct ftl_io *io, void *arg, int status)
+{
+ struct ftl_md_io *md_io = (struct ftl_md_io *)io;
+
+ if (!status) {
+ status = md_io->pack_fn(md_io->io.band);
+ } else {
+ status = FTL_MD_IO_FAILURE;
+ }
+
+ md_io->cb_fn(io, md_io->cb_ctx, status);
+}
+
+static struct ftl_md_io *
+ftl_io_init_md_read(struct spdk_ftl_dev *dev, struct ftl_addr addr,
+ struct ftl_band *band, size_t num_blocks, void *buf,
+ ftl_io_fn fn, ftl_md_pack_fn pack_fn, ftl_io_fn cb_fn, void *cb_ctx)
+{
+ struct ftl_md_io *io;
+ struct ftl_io_init_opts opts = {
+ .dev = dev,
+ .io = NULL,
+ .band = band,
+ .size = sizeof(*io),
+ .flags = FTL_IO_MD | FTL_IO_PHYSICAL_MODE,
+ .type = FTL_IO_READ,
+ .num_blocks = num_blocks,
+ .cb_fn = fn,
+ .iovs = {
+ {
+ .iov_base = buf,
+ .iov_len = num_blocks * FTL_BLOCK_SIZE,
+ }
+ },
+ .iovcnt = 1,
+ };
+
+ io = (struct ftl_md_io *)ftl_io_init_internal(&opts);
+ if (!io) {
+ return NULL;
+ }
+
+ io->io.addr = addr;
+ io->pack_fn = pack_fn;
+ io->cb_fn = cb_fn;
+ io->cb_ctx = cb_ctx;
+
+ return io;
+}
+
+static struct ftl_io *
+ftl_io_init_md_write(struct spdk_ftl_dev *dev, struct ftl_band *band,
+ void *data, size_t num_blocks, ftl_io_fn cb)
+{
+ struct ftl_io_init_opts opts = {
+ .dev = dev,
+ .io = NULL,
+ .band = band,
+ .size = sizeof(struct ftl_io),
+ .flags = FTL_IO_MD | FTL_IO_PHYSICAL_MODE,
+ .type = FTL_IO_WRITE,
+ .num_blocks = num_blocks,
+ .cb_fn = cb,
+ .iovs = {
+ {
+ .iov_base = data,
+ .iov_len = num_blocks * FTL_BLOCK_SIZE,
+ }
+ },
+ .iovcnt = 1,
+ .md = NULL,
+ };
+
+ return ftl_io_init_internal(&opts);
+}
+
+static int
+ftl_band_write_md(struct ftl_band *band, size_t num_blocks,
+ ftl_md_pack_fn md_fn, ftl_io_fn cb)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_io *io;
+
+ io = ftl_io_init_md_write(dev, band, band->lba_map.dma_buf, num_blocks, cb);
+ if (!io) {
+ return -ENOMEM;
+ }
+
+ md_fn(band);
+
+ ftl_io_write(io);
+ return 0;
+}
+
+void
+ftl_band_md_clear(struct ftl_band *band)
+{
+ band->seq = 0;
+ band->wr_cnt = 0;
+ band->lba_map.num_vld = 0;
+ band->lba_map.map = NULL;
+}
+
+int
+ftl_band_write_head_md(struct ftl_band *band, ftl_io_fn cb)
+{
+ return ftl_band_write_md(band, ftl_head_md_num_blocks(band->dev),
+ ftl_pack_head_md, cb);
+}
+
+int
+ftl_band_write_tail_md(struct ftl_band *band, ftl_io_fn cb)
+{
+ return ftl_band_write_md(band, ftl_tail_md_num_blocks(band->dev),
+ ftl_pack_tail_md, cb);
+}
+
+static struct ftl_addr
+ftl_band_lba_map_addr(struct ftl_band *band, size_t offset)
+{
+ return ftl_band_next_xfer_addr(band, band->tail_md_addr,
+ ftl_tail_md_hdr_num_blocks() +
+ ftl_vld_map_num_blocks(band->dev) +
+ offset);
+}
+
+static int
+ftl_band_read_md(struct ftl_band *band, size_t num_blocks, struct ftl_addr start_addr,
+ void *buf, ftl_io_fn fn, ftl_md_pack_fn pack_fn, ftl_io_fn cb_fn, void *cb_ctx)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_md_io *io;
+
+ if (spdk_unlikely(!band->num_zones)) {
+ return -ENOENT;
+ }
+
+ io = ftl_io_init_md_read(dev, start_addr, band, num_blocks, buf, fn, pack_fn, cb_fn, cb_ctx);
+ if (!io) {
+ return -ENOMEM;
+ }
+
+ ftl_io_read((struct ftl_io *)io);
+ return 0;
+}
+
+int
+ftl_band_read_tail_md(struct ftl_band *band, struct ftl_addr addr, ftl_io_fn cb_fn, void *cb_ctx)
+{
+ return ftl_band_read_md(band, ftl_tail_md_num_blocks(band->dev), addr, band->lba_map.dma_buf,
+ ftl_read_md_cb, ftl_unpack_tail_md, cb_fn, cb_ctx);
+}
+
+static size_t
+ftl_lba_map_request_segment_done(struct ftl_lba_map_request *request, size_t offset,
+ size_t num_segments)
+{
+ size_t i, num_done = 0;
+
+ for (i = offset; i < offset + num_segments; ++i) {
+ if (spdk_bit_array_get(request->segments, i)) {
+ spdk_bit_array_clear(request->segments, offset);
+ num_done++;
+ }
+ }
+
+ assert(request->num_pending >= num_done);
+ request->num_pending -= num_done;
+
+ return num_done;
+}
+
+static void
+ftl_lba_map_set_segment_state(struct ftl_lba_map *lba_map, size_t offset, size_t num_segments,
+ enum ftl_lba_map_seg_state state)
+{
+ size_t i;
+
+ for (i = offset; i < offset + num_segments; ++i) {
+ lba_map->segments[i] = state;
+ }
+}
+
+static void
+ftl_lba_map_request_free(struct spdk_ftl_dev *dev, struct ftl_lba_map_request *request)
+{
+ spdk_bit_array_clear_mask(request->segments);
+ spdk_mempool_put(dev->lba_request_pool, request);
+}
+
+static void
+ftl_process_lba_map_requests(struct spdk_ftl_dev *dev, struct ftl_lba_map *lba_map, size_t offset,
+ size_t num_segments, int status)
+{
+ struct ftl_lba_map_request *request, *trequest;
+ size_t num_done;
+
+ LIST_FOREACH_SAFE(request, &lba_map->request_list, list_entry, trequest) {
+ num_done = ftl_lba_map_request_segment_done(request, offset, num_segments);
+ if (request->num_pending == 0 || (status && num_done)) {
+ request->cb(NULL, request->cb_ctx, status);
+ LIST_REMOVE(request, list_entry);
+ ftl_lba_map_request_free(dev, request);
+ }
+ }
+}
+
+static size_t
+ftl_lba_map_offset_from_addr(struct ftl_band *band, struct ftl_addr addr)
+{
+ size_t offset;
+ struct ftl_addr start_addr = ftl_band_lba_map_addr(band, 0);
+
+ offset = ftl_xfer_offset_from_addr(band, addr) - ftl_xfer_offset_from_addr(band, start_addr);
+ assert(offset < ftl_lba_map_num_blocks(band->dev));
+
+ return offset;
+}
+
+static void
+ftl_read_lba_map_cb(struct ftl_io *io, void *arg, int status)
+{
+ struct ftl_lba_map *lba_map = &io->band->lba_map;
+ uint64_t block_off;
+
+ block_off = ftl_lba_map_offset_from_addr(io->band, io->addr);
+ assert(block_off + io->num_blocks <= ftl_lba_map_num_blocks(io->dev));
+
+ if (!status) {
+ ftl_lba_map_set_segment_state(lba_map, block_off, io->num_blocks,
+ FTL_LBA_MAP_SEG_CACHED);
+ }
+
+ ftl_process_lba_map_requests(io->dev, lba_map, block_off, io->num_blocks, status);
+}
+
+static struct ftl_lba_map_request *
+ftl_lba_map_alloc_request(struct ftl_band *band, size_t offset, size_t num_segments,
+ ftl_io_fn cb, void *cb_ctx)
+{
+ struct ftl_lba_map_request *request;
+ struct spdk_ftl_dev *dev = band->dev;
+ size_t i;
+
+ request = spdk_mempool_get(dev->lba_request_pool);
+ if (!request) {
+ return NULL;
+ }
+
+ request->cb = cb;
+ request->cb_ctx = cb_ctx;
+ request->num_pending = num_segments;
+
+ for (i = offset; i < offset + num_segments; ++i) {
+ spdk_bit_array_set(request->segments, i);
+ }
+
+ return request;
+}
+
+static size_t
+ftl_lba_map_num_clear_segments(struct ftl_lba_map *lba_map,
+ size_t offset, size_t num_segments)
+{
+ size_t i, cnt = 0;
+
+ for (i = offset; i < offset + num_segments; ++i) {
+ if (lba_map->segments[i] != FTL_LBA_MAP_SEG_CLEAR) {
+ break;
+ }
+ cnt++;
+ }
+
+ return cnt;
+}
+
+int
+ftl_band_read_lba_map(struct ftl_band *band, size_t offset, size_t lba_cnt,
+ ftl_io_fn cb_fn, void *cb_ctx)
+{
+ size_t num_blocks, block_off, num_read, num_segments;
+ struct ftl_lba_map *lba_map = &band->lba_map;
+ struct ftl_lba_map_request *request;
+ int rc = 0;
+
+ block_off = offset / FTL_NUM_LBA_IN_BLOCK;
+ num_segments = spdk_divide_round_up(offset + lba_cnt, FTL_NUM_LBA_IN_BLOCK);
+ num_blocks = num_segments - block_off;
+ assert(block_off + num_blocks <= ftl_lba_map_num_blocks(band->dev));
+
+ request = ftl_lba_map_alloc_request(band, block_off, num_blocks, cb_fn, cb_ctx);
+ if (!request) {
+ return -ENOMEM;
+ }
+
+ while (num_blocks) {
+ if (lba_map->segments[block_off] != FTL_LBA_MAP_SEG_CLEAR) {
+ if (lba_map->segments[block_off] == FTL_LBA_MAP_SEG_CACHED) {
+ ftl_lba_map_request_segment_done(request, block_off, 1);
+ }
+ num_blocks--;
+ block_off++;
+ continue;
+ }
+
+ num_read = ftl_lba_map_num_clear_segments(lba_map, block_off, num_blocks);
+ ftl_lba_map_set_segment_state(lba_map, block_off, num_read,
+ FTL_LBA_MAP_SEG_PENDING);
+
+ rc = ftl_band_read_md(band, num_read, ftl_band_lba_map_addr(band, block_off),
+ (char *)band->lba_map.map + block_off * FTL_BLOCK_SIZE,
+ ftl_read_lba_map_cb, NULL, cb_fn, cb_ctx);
+ if (rc) {
+ ftl_lba_map_request_free(band->dev, request);
+ return rc;
+ }
+
+ assert(num_blocks >= num_read);
+ num_blocks -= num_read;
+ block_off += num_read;
+ }
+
+ if (request->num_pending) {
+ LIST_INSERT_HEAD(&lba_map->request_list, request, list_entry);
+ } else {
+ cb_fn(NULL, cb_ctx, 0);
+ ftl_lba_map_request_free(band->dev, request);
+ }
+
+ return rc;
+}
+
+int
+ftl_band_read_head_md(struct ftl_band *band, ftl_io_fn cb_fn, void *cb_ctx)
+{
+ return ftl_band_read_md(band,
+ ftl_head_md_num_blocks(band->dev),
+ ftl_band_head_md_addr(band),
+ band->lba_map.dma_buf,
+ ftl_read_md_cb,
+ ftl_unpack_head_md,
+ cb_fn,
+ cb_ctx);
+}
+
+void
+ftl_band_remove_zone(struct ftl_band *band, struct ftl_zone *zone)
+{
+ CIRCLEQ_REMOVE(&band->zones, zone, circleq);
+ band->num_zones--;
+}
+
+int
+ftl_band_write_prep(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+
+ if (ftl_band_alloc_lba_map(band)) {
+ return -1;
+ }
+
+ band->seq = ++dev->seq;
+ return 0;
+}
+
+struct ftl_zone *
+ftl_band_next_operational_zone(struct ftl_band *band, struct ftl_zone *zone)
+{
+ struct ftl_zone *result = NULL;
+ struct ftl_zone *entry;
+
+ if (spdk_unlikely(!band->num_zones)) {
+ return NULL;
+ }
+
+ /* Erasing band may fail after it was assigned to wptr. */
+ /* In such a case zone is no longer in band->zones queue. */
+ if (spdk_likely(zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE)) {
+ result = ftl_band_next_zone(band, zone);
+ } else {
+ CIRCLEQ_FOREACH_REVERSE(entry, &band->zones, circleq) {
+ if (entry->info.zone_id > zone->info.zone_id) {
+ result = entry;
+ } else {
+ if (!result) {
+ result = CIRCLEQ_FIRST(&band->zones);
+ }
+ break;
+ }
+ }
+ }
+
+ return result;
+}
+
+void
+ftl_band_clear_lba_map(struct ftl_band *band)
+{
+ struct ftl_lba_map *lba_map = &band->lba_map;
+ size_t num_segments;
+
+ spdk_bit_array_clear_mask(lba_map->vld);
+ memset(lba_map->map, 0, ftl_lba_map_num_blocks(band->dev) * FTL_BLOCK_SIZE);
+
+ /* For open band all lba map segments are already cached */
+ assert(band->state == FTL_BAND_STATE_PREP);
+ num_segments = spdk_divide_round_up(ftl_get_num_blocks_in_band(band->dev), FTL_NUM_LBA_IN_BLOCK);
+ ftl_lba_map_set_segment_state(&band->lba_map, 0, num_segments, FTL_LBA_MAP_SEG_CACHED);
+
+ lba_map->num_vld = 0;
+}
+
+size_t
+ftl_lba_map_pool_elem_size(struct spdk_ftl_dev *dev)
+{
+ /* Map pool element holds the whole tail md + segments map */
+ return ftl_tail_md_num_blocks(dev) * FTL_BLOCK_SIZE +
+ spdk_divide_round_up(ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK);
+}
diff --git a/src/spdk/lib/ftl/ftl_band.h b/src/spdk/lib/ftl/ftl_band.h
new file mode 100644
index 000000000..109b369a5
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_band.h
@@ -0,0 +1,287 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FTL_BAND_H
+#define FTL_BAND_H
+
+#include "spdk/stdinc.h"
+#include "spdk/bit_array.h"
+#include "spdk/queue.h"
+#include "spdk/bdev_zone.h"
+
+#include "ftl_io.h"
+#include "ftl_addr.h"
+#include "ftl_core.h"
+
+/* Number of LBAs that could be stored in a single block */
+#define FTL_NUM_LBA_IN_BLOCK (FTL_BLOCK_SIZE / sizeof(uint64_t))
+
+struct spdk_ftl_dev;
+struct ftl_lba_map_request;
+
+struct ftl_zone {
+ struct spdk_bdev_zone_info info;
+
+ /* Indicates that there is inflight write */
+ bool busy;
+
+ CIRCLEQ_ENTRY(ftl_zone) circleq;
+};
+
+enum ftl_md_status {
+ FTL_MD_SUCCESS,
+ /* Metadata read failure */
+ FTL_MD_IO_FAILURE,
+ /* Invalid version */
+ FTL_MD_INVALID_VER,
+ /* UUID doesn't match */
+ FTL_MD_NO_MD,
+ /* UUID and version matches but CRC doesn't */
+ FTL_MD_INVALID_CRC,
+ /* Vld or lba map size doesn't match */
+ FTL_MD_INVALID_SIZE
+};
+
+enum ftl_lba_map_seg_state {
+ FTL_LBA_MAP_SEG_CLEAR,
+ FTL_LBA_MAP_SEG_PENDING,
+ FTL_LBA_MAP_SEG_CACHED
+};
+
+struct ftl_lba_map {
+ /* LBA/vld map lock */
+ pthread_spinlock_t lock;
+
+ /* Number of valid LBAs */
+ size_t num_vld;
+
+ /* LBA map's reference count */
+ size_t ref_cnt;
+
+ /* Bitmap of valid LBAs */
+ struct spdk_bit_array *vld;
+
+ /* LBA map (only valid for open/relocating bands) */
+ uint64_t *map;
+
+ /* LBA map segment state map (clear, pending, cached) */
+ uint8_t *segments;
+
+ LIST_HEAD(, ftl_lba_map_request) request_list;
+
+ /* Metadata DMA buffer (only valid for open/relocating bands) */
+ void *dma_buf;
+};
+
+enum ftl_band_state {
+ FTL_BAND_STATE_FREE,
+ FTL_BAND_STATE_PREP,
+ FTL_BAND_STATE_OPENING,
+ FTL_BAND_STATE_OPEN,
+ FTL_BAND_STATE_FULL,
+ FTL_BAND_STATE_CLOSING,
+ FTL_BAND_STATE_CLOSED,
+ FTL_BAND_STATE_MAX
+};
+
+struct ftl_lba_map_request {
+ /* Completion callback */
+ ftl_io_fn cb;
+
+ /* Completion callback context */
+ void *cb_ctx;
+
+ /* Bit array of requested segments */
+ struct spdk_bit_array *segments;
+
+ /* Number of pending segments to read */
+ size_t num_pending;
+
+ LIST_ENTRY(ftl_lba_map_request) list_entry;
+};
+
+struct ftl_band {
+ /* Device this band belongs to */
+ struct spdk_ftl_dev *dev;
+
+ /* Number of operational zones */
+ size_t num_zones;
+
+ /* Array of zones */
+ struct ftl_zone *zone_buf;
+
+ /* List of operational zones */
+ CIRCLEQ_HEAD(, ftl_zone) zones;
+
+ /* LBA map */
+ struct ftl_lba_map lba_map;
+
+ /* Band's state */
+ enum ftl_band_state state;
+
+ /* Band's index */
+ unsigned int id;
+
+ /* Latest merit calculation */
+ double merit;
+
+ /* High defrag priority - means that the metadata should be copied and */
+ /* the band should be defragged immediately */
+ int high_prio;
+
+ /* Sequence number */
+ uint64_t seq;
+
+ /* Number of defrag cycles */
+ uint64_t wr_cnt;
+
+ /* End metadata start addr */
+ struct ftl_addr tail_md_addr;
+
+ /* Bitmap of all bands that have its data moved onto this band */
+ struct spdk_bit_array *reloc_bitmap;
+ /* Number of open bands containing data moved from this band */
+ size_t num_reloc_bands;
+ /* Number of blocks currently being moved from this band */
+ size_t num_reloc_blocks;
+
+ /* Free/shut bands' lists */
+ LIST_ENTRY(ftl_band) list_entry;
+
+ /* High priority queue link */
+ STAILQ_ENTRY(ftl_band) prio_stailq;
+};
+
+uint64_t ftl_band_block_offset_from_addr(struct ftl_band *band, struct ftl_addr addr);
+struct ftl_addr ftl_band_addr_from_block_offset(struct ftl_band *band, uint64_t block_off);
+void ftl_band_set_state(struct ftl_band *band, enum ftl_band_state state);
+size_t ftl_band_age(const struct ftl_band *band);
+void ftl_band_acquire_lba_map(struct ftl_band *band);
+int ftl_band_alloc_lba_map(struct ftl_band *band);
+void ftl_band_clear_lba_map(struct ftl_band *band);
+void ftl_band_release_lba_map(struct ftl_band *band);
+int ftl_band_read_lba_map(struct ftl_band *band,
+ size_t offset, size_t lba_cnt,
+ ftl_io_fn cb_fn, void *cb_ctx);
+struct ftl_addr ftl_band_next_xfer_addr(struct ftl_band *band, struct ftl_addr addr,
+ size_t num_blocks);
+struct ftl_addr ftl_band_next_addr(struct ftl_band *band, struct ftl_addr addr,
+ size_t offset);
+size_t ftl_band_num_usable_blocks(const struct ftl_band *band);
+size_t ftl_band_user_blocks_left(const struct ftl_band *band, size_t offset);
+size_t ftl_band_user_blocks(const struct ftl_band *band);
+void ftl_band_set_addr(struct ftl_band *band, uint64_t lba,
+ struct ftl_addr addr);
+struct ftl_band *ftl_band_from_addr(struct spdk_ftl_dev *dev, struct ftl_addr addr);
+struct ftl_zone *ftl_band_zone_from_addr(struct ftl_band *band, struct ftl_addr);
+void ftl_band_md_clear(struct ftl_band *band);
+int ftl_band_read_tail_md(struct ftl_band *band, struct ftl_addr,
+ ftl_io_fn cb_fn, void *cb_ctx);
+int ftl_band_read_head_md(struct ftl_band *band, ftl_io_fn cb_fn, void *cb_ctx);
+int ftl_band_write_tail_md(struct ftl_band *band, ftl_io_fn cb);
+int ftl_band_write_head_md(struct ftl_band *band, ftl_io_fn cb);
+struct ftl_addr ftl_band_tail_md_addr(struct ftl_band *band);
+struct ftl_addr ftl_band_head_md_addr(struct ftl_band *band);
+void ftl_band_write_failed(struct ftl_band *band);
+int ftl_band_full(struct ftl_band *band, size_t offset);
+int ftl_band_write_prep(struct ftl_band *band);
+struct ftl_zone *ftl_band_next_operational_zone(struct ftl_band *band,
+ struct ftl_zone *zone);
+size_t ftl_lba_map_pool_elem_size(struct spdk_ftl_dev *dev);
+void ftl_band_remove_zone(struct ftl_band *band, struct ftl_zone *zone);
+
+
+static inline int
+ftl_band_empty(const struct ftl_band *band)
+{
+ return band->lba_map.num_vld == 0;
+}
+
+static inline struct ftl_zone *
+ftl_band_next_zone(struct ftl_band *band, struct ftl_zone *zone)
+{
+ assert(zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE);
+ return CIRCLEQ_LOOP_NEXT(&band->zones, zone, circleq);
+}
+
+static inline void
+ftl_band_set_next_state(struct ftl_band *band)
+{
+ ftl_band_set_state(band, (band->state + 1) % FTL_BAND_STATE_MAX);
+}
+
+static inline int
+ftl_band_state_changing(struct ftl_band *band)
+{
+ return band->state == FTL_BAND_STATE_OPENING ||
+ band->state == FTL_BAND_STATE_CLOSING;
+}
+
+static inline int
+ftl_band_block_offset_valid(struct ftl_band *band, size_t block_off)
+{
+ struct ftl_lba_map *lba_map = &band->lba_map;
+
+ pthread_spin_lock(&lba_map->lock);
+ if (spdk_bit_array_get(lba_map->vld, block_off)) {
+ pthread_spin_unlock(&lba_map->lock);
+ return 1;
+ }
+
+ pthread_spin_unlock(&lba_map->lock);
+ return 0;
+}
+
+static inline int
+ftl_band_zone_is_last(struct ftl_band *band, struct ftl_zone *zone)
+{
+ return zone == CIRCLEQ_LAST(&band->zones);
+}
+
+static inline int
+ftl_band_zone_is_first(struct ftl_band *band, struct ftl_zone *zone)
+{
+ return zone == CIRCLEQ_FIRST(&band->zones);
+}
+
+static inline int
+ftl_zone_is_writable(const struct spdk_ftl_dev *dev, const struct ftl_zone *zone)
+{
+ bool busy = ftl_is_append_supported(dev) ? false : zone->busy;
+
+ return (zone->info.state == SPDK_BDEV_ZONE_STATE_OPEN ||
+ zone->info.state == SPDK_BDEV_ZONE_STATE_EMPTY) &&
+ !busy;
+}
+
+#endif /* FTL_BAND_H */
diff --git a/src/spdk/lib/ftl/ftl_core.c b/src/spdk/lib/ftl/ftl_core.c
new file mode 100644
index 000000000..b0b448806
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_core.c
@@ -0,0 +1,2460 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/likely.h"
+#include "spdk/stdinc.h"
+#include "spdk/nvme.h"
+#include "spdk/thread.h"
+#include "spdk/bdev_module.h"
+#include "spdk/string.h"
+#include "spdk_internal/log.h"
+#include "spdk/ftl.h"
+#include "spdk/crc32.h"
+
+#include "ftl_core.h"
+#include "ftl_band.h"
+#include "ftl_io.h"
+#include "ftl_debug.h"
+#include "ftl_reloc.h"
+
+struct ftl_band_flush {
+ struct spdk_ftl_dev *dev;
+ /* Number of bands left to be flushed */
+ size_t num_bands;
+ /* User callback */
+ spdk_ftl_fn cb_fn;
+ /* Callback's argument */
+ void *cb_arg;
+ /* List link */
+ LIST_ENTRY(ftl_band_flush) list_entry;
+};
+
+struct ftl_wptr {
+ /* Owner device */
+ struct spdk_ftl_dev *dev;
+
+ /* Current address */
+ struct ftl_addr addr;
+
+ /* Band currently being written to */
+ struct ftl_band *band;
+
+ /* Current logical block's offset */
+ uint64_t offset;
+
+ /* Current zone */
+ struct ftl_zone *zone;
+
+ /* Pending IO queue */
+ TAILQ_HEAD(, ftl_io) pending_queue;
+
+ /* List link */
+ LIST_ENTRY(ftl_wptr) list_entry;
+
+ /*
+ * If setup in direct mode, there will be no offset or band state update after IO.
+ * The zoned bdev address is not assigned by wptr, and is instead taken directly
+ * from the request.
+ */
+ bool direct_mode;
+
+ /* Number of outstanding write requests */
+ uint32_t num_outstanding;
+
+ /* Marks that the band related to this wptr needs to be closed as soon as possible */
+ bool flush;
+};
+
+struct ftl_flush {
+ /* Owner device */
+ struct spdk_ftl_dev *dev;
+
+ /* Number of batches to wait for */
+ size_t num_req;
+
+ /* Callback */
+ struct {
+ spdk_ftl_fn fn;
+ void *ctx;
+ } cb;
+
+ /* Batch bitmap */
+ struct spdk_bit_array *bmap;
+
+ /* List link */
+ LIST_ENTRY(ftl_flush) list_entry;
+};
+
+static void
+ftl_wptr_free(struct ftl_wptr *wptr)
+{
+ if (!wptr) {
+ return;
+ }
+
+ free(wptr);
+}
+
+static void
+ftl_remove_wptr(struct ftl_wptr *wptr)
+{
+ struct spdk_ftl_dev *dev = wptr->dev;
+ struct ftl_band_flush *flush, *tmp;
+
+ if (spdk_unlikely(wptr->flush)) {
+ LIST_FOREACH_SAFE(flush, &dev->band_flush_list, list_entry, tmp) {
+ assert(flush->num_bands > 0);
+ if (--flush->num_bands == 0) {
+ flush->cb_fn(flush->cb_arg, 0);
+ LIST_REMOVE(flush, list_entry);
+ free(flush);
+ }
+ }
+ }
+
+ LIST_REMOVE(wptr, list_entry);
+ ftl_wptr_free(wptr);
+}
+
+static struct ftl_wbuf_entry *
+ftl_acquire_wbuf_entry(struct ftl_io_channel *io_channel, int io_flags)
+{
+ struct ftl_wbuf_entry *entry = NULL;
+ uint32_t qdepth;
+
+ if (!(io_flags & FTL_IO_INTERNAL)) {
+ qdepth = __atomic_fetch_add(&io_channel->qdepth_current, 1, __ATOMIC_SEQ_CST);
+ if (qdepth >= io_channel->qdepth_limit) {
+ __atomic_fetch_sub(&io_channel->qdepth_current, 1, __ATOMIC_SEQ_CST);
+ return NULL;
+ }
+ }
+
+ if (spdk_ring_dequeue(io_channel->free_queue, (void **)&entry, 1) != 1) {
+ if (!(io_flags & FTL_IO_INTERNAL)) {
+ __atomic_fetch_sub(&io_channel->qdepth_current, 1, __ATOMIC_SEQ_CST);
+ }
+
+ return NULL;
+ }
+
+ assert(entry != NULL);
+
+ ftl_evict_cache_entry(io_channel->dev, entry);
+
+ entry->io_flags = io_flags;
+ entry->addr.offset = FTL_ADDR_INVALID;
+ entry->lba = FTL_LBA_INVALID;
+ entry->band = NULL;
+ entry->valid = false;
+
+ return entry;
+}
+
+static void
+ftl_release_wbuf_entry(struct ftl_wbuf_entry *entry)
+{
+ struct ftl_io_channel *io_channel = entry->ioch;
+
+ if (!(entry->io_flags & FTL_IO_INTERNAL)) {
+ __atomic_fetch_sub(&io_channel->qdepth_current, 1, __ATOMIC_SEQ_CST);
+ }
+
+ spdk_ring_enqueue(io_channel->free_queue, (void **)&entry, 1, NULL);
+}
+
+static struct ftl_batch *
+ftl_get_next_batch(struct spdk_ftl_dev *dev)
+{
+ struct ftl_batch *batch = dev->current_batch;
+ struct ftl_io_channel *ioch;
+#define FTL_DEQUEUE_ENTRIES 128
+ struct ftl_wbuf_entry *entries[FTL_DEQUEUE_ENTRIES];
+ TAILQ_HEAD(, ftl_io_channel) ioch_queue;
+ size_t i, num_dequeued, num_remaining;
+ uint64_t *metadata;
+
+ if (batch == NULL) {
+ batch = TAILQ_FIRST(&dev->pending_batches);
+ if (batch != NULL) {
+ TAILQ_REMOVE(&dev->pending_batches, batch, tailq);
+ return batch;
+ }
+
+ batch = TAILQ_FIRST(&dev->free_batches);
+ if (spdk_unlikely(batch == NULL)) {
+ return NULL;
+ }
+
+ assert(TAILQ_EMPTY(&batch->entries));
+ assert(batch->num_entries == 0);
+ TAILQ_REMOVE(&dev->free_batches, batch, tailq);
+ }
+
+ /*
+ * Keep shifting the queue to ensure fairness in IO channel selection. Each time
+ * ftl_get_next_batch() is called, we're starting to dequeue write buffer entries from a
+ * different IO channel.
+ */
+ TAILQ_INIT(&ioch_queue);
+ while (!TAILQ_EMPTY(&dev->ioch_queue)) {
+ ioch = TAILQ_FIRST(&dev->ioch_queue);
+ TAILQ_REMOVE(&dev->ioch_queue, ioch, tailq);
+ TAILQ_INSERT_TAIL(&ioch_queue, ioch, tailq);
+
+ num_remaining = dev->xfer_size - batch->num_entries;
+ while (num_remaining > 0) {
+ num_dequeued = spdk_ring_dequeue(ioch->submit_queue, (void **)entries,
+ spdk_min(num_remaining,
+ FTL_DEQUEUE_ENTRIES));
+ if (num_dequeued == 0) {
+ break;
+ }
+
+ for (i = 0; i < num_dequeued; ++i) {
+ batch->iov[batch->num_entries + i].iov_base = entries[i]->payload;
+ batch->iov[batch->num_entries + i].iov_len = FTL_BLOCK_SIZE;
+
+ if (batch->metadata != NULL) {
+ metadata = (uint64_t *)((char *)batch->metadata +
+ i * dev->md_size);
+ *metadata = entries[i]->lba;
+ }
+
+ TAILQ_INSERT_TAIL(&batch->entries, entries[i], tailq);
+ }
+
+ batch->num_entries += num_dequeued;
+ num_remaining -= num_dequeued;
+ }
+
+ if (num_remaining == 0) {
+ break;
+ }
+ }
+
+ TAILQ_CONCAT(&dev->ioch_queue, &ioch_queue, tailq);
+
+ if (batch->num_entries == dev->xfer_size) {
+ dev->current_batch = NULL;
+ } else {
+ dev->current_batch = batch;
+ batch = NULL;
+ }
+
+ return batch;
+}
+
+static void
+ftl_release_batch(struct spdk_ftl_dev *dev, struct ftl_batch *batch)
+{
+ struct ftl_wbuf_entry *entry;
+
+ while (!TAILQ_EMPTY(&batch->entries)) {
+ entry = TAILQ_FIRST(&batch->entries);
+ TAILQ_REMOVE(&batch->entries, entry, tailq);
+ ftl_release_wbuf_entry(entry);
+ }
+
+ batch->num_entries = 0;
+ TAILQ_INSERT_TAIL(&dev->free_batches, batch, tailq);
+}
+
+static struct ftl_wbuf_entry *
+ftl_get_entry_from_addr(struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ struct ftl_io_channel *ioch;
+ uint64_t ioch_offset, entry_offset;
+
+ ioch_offset = addr.cache_offset & ((1 << dev->ioch_shift) - 1);
+ entry_offset = addr.cache_offset >> dev->ioch_shift;
+ ioch = dev->ioch_array[ioch_offset];
+
+ assert(ioch_offset < dev->conf.max_io_channels);
+ assert(entry_offset < ioch->num_entries);
+ assert(addr.cached == 1);
+
+ return &ioch->wbuf_entries[entry_offset];
+}
+
+static struct ftl_addr
+ftl_get_addr_from_entry(struct ftl_wbuf_entry *entry)
+{
+ struct ftl_io_channel *ioch = entry->ioch;
+ struct ftl_addr addr = {};
+
+ addr.cached = 1;
+ addr.cache_offset = (uint64_t)entry->index << ioch->dev->ioch_shift | ioch->index;
+
+ return addr;
+}
+
+static void
+ftl_io_cmpl_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_io *io = cb_arg;
+ struct spdk_ftl_dev *dev = io->dev;
+
+ if (spdk_unlikely(!success)) {
+ io->status = -EIO;
+ }
+
+ ftl_trace_completion(dev, io, FTL_TRACE_COMPLETION_DISK);
+
+ if (io->type == FTL_IO_WRITE && ftl_is_append_supported(dev)) {
+ assert(io->parent);
+ io->parent->addr.offset = spdk_bdev_io_get_append_location(bdev_io);
+ }
+
+ ftl_io_dec_req(io);
+ if (ftl_io_done(io)) {
+ ftl_io_complete(io);
+ }
+
+ spdk_bdev_free_io(bdev_io);
+}
+
+static void
+ftl_halt_writes(struct spdk_ftl_dev *dev, struct ftl_band *band)
+{
+ struct ftl_wptr *wptr = NULL;
+
+ LIST_FOREACH(wptr, &dev->wptr_list, list_entry) {
+ if (wptr->band == band) {
+ break;
+ }
+ }
+
+ /* If the band already has the high_prio flag set, other writes must */
+ /* have failed earlier, so it's already taken care of. */
+ if (band->high_prio) {
+ assert(wptr == NULL);
+ return;
+ }
+
+ ftl_band_write_failed(band);
+ ftl_remove_wptr(wptr);
+}
+
+static struct ftl_wptr *
+ftl_wptr_from_band(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_wptr *wptr = NULL;
+
+ LIST_FOREACH(wptr, &dev->wptr_list, list_entry) {
+ if (wptr->band == band) {
+ return wptr;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+ftl_md_write_fail(struct ftl_io *io, int status)
+{
+ struct ftl_band *band = io->band;
+ struct ftl_wptr *wptr;
+ char buf[128];
+
+ wptr = ftl_wptr_from_band(band);
+ assert(wptr);
+
+ SPDK_ERRLOG("Metadata write failed @addr: %s, status: %d\n",
+ ftl_addr2str(wptr->addr, buf, sizeof(buf)), status);
+
+ ftl_halt_writes(io->dev, band);
+}
+
+static void
+ftl_md_write_cb(struct ftl_io *io, void *arg, int status)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_nv_cache *nv_cache = &dev->nv_cache;
+ struct ftl_band *band = io->band;
+ struct ftl_wptr *wptr;
+ size_t id;
+
+ wptr = ftl_wptr_from_band(band);
+ assert(wptr);
+
+ if (status) {
+ ftl_md_write_fail(io, status);
+ return;
+ }
+
+ ftl_band_set_next_state(band);
+ if (band->state == FTL_BAND_STATE_CLOSED) {
+ if (ftl_dev_has_nv_cache(dev)) {
+ pthread_spin_lock(&nv_cache->lock);
+ nv_cache->num_available += ftl_band_user_blocks(band);
+
+ if (spdk_unlikely(nv_cache->num_available > nv_cache->num_data_blocks)) {
+ nv_cache->num_available = nv_cache->num_data_blocks;
+ }
+ pthread_spin_unlock(&nv_cache->lock);
+ }
+
+ /*
+ * Go through the reloc_bitmap, checking for all the bands that had its data moved
+ * onto current band and update their counters to allow them to be used for writing
+ * (once they're closed and empty).
+ */
+ for (id = 0; id < ftl_get_num_bands(dev); ++id) {
+ if (spdk_bit_array_get(band->reloc_bitmap, id)) {
+ assert(dev->bands[id].num_reloc_bands > 0);
+ dev->bands[id].num_reloc_bands--;
+
+ spdk_bit_array_clear(band->reloc_bitmap, id);
+ }
+ }
+
+ ftl_remove_wptr(wptr);
+ }
+}
+
+static int
+ftl_read_next_physical_addr(struct ftl_io *io, struct ftl_addr *addr)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ size_t num_blocks, max_blocks;
+
+ assert(ftl_io_mode_physical(io));
+ assert(io->iov_pos < io->iov_cnt);
+
+ if (io->pos == 0) {
+ *addr = io->addr;
+ } else {
+ *addr = ftl_band_next_xfer_addr(io->band, io->addr, io->pos);
+ }
+
+ assert(!ftl_addr_invalid(*addr));
+
+ /* Metadata has to be read in the way it's written (jumping across */
+ /* the zones in xfer_size increments) */
+ if (io->flags & FTL_IO_MD) {
+ max_blocks = dev->xfer_size - (addr->offset % dev->xfer_size);
+ num_blocks = spdk_min(ftl_io_iovec_len_left(io), max_blocks);
+ assert(addr->offset / dev->xfer_size ==
+ (addr->offset + num_blocks - 1) / dev->xfer_size);
+ } else {
+ num_blocks = ftl_io_iovec_len_left(io);
+ }
+
+ return num_blocks;
+}
+
+static int
+ftl_wptr_close_band(struct ftl_wptr *wptr)
+{
+ struct ftl_band *band = wptr->band;
+
+ ftl_band_set_state(band, FTL_BAND_STATE_CLOSING);
+
+ return ftl_band_write_tail_md(band, ftl_md_write_cb);
+}
+
+static int
+ftl_wptr_open_band(struct ftl_wptr *wptr)
+{
+ struct ftl_band *band = wptr->band;
+
+ assert(ftl_band_zone_is_first(band, wptr->zone));
+ assert(band->lba_map.num_vld == 0);
+
+ ftl_band_clear_lba_map(band);
+
+ assert(band->state == FTL_BAND_STATE_PREP);
+ ftl_band_set_state(band, FTL_BAND_STATE_OPENING);
+
+ return ftl_band_write_head_md(band, ftl_md_write_cb);
+}
+
+static int
+ftl_submit_erase(struct ftl_io *io)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_band *band = io->band;
+ struct ftl_addr addr = io->addr;
+ struct ftl_io_channel *ioch;
+ struct ftl_zone *zone;
+ int rc = 0;
+ size_t i;
+
+ ioch = ftl_io_channel_get_ctx(ftl_get_io_channel(dev));
+
+ for (i = 0; i < io->num_blocks; ++i) {
+ if (i != 0) {
+ zone = ftl_band_next_zone(band, ftl_band_zone_from_addr(band, addr));
+ assert(zone->info.state == SPDK_BDEV_ZONE_STATE_FULL);
+ addr.offset = zone->info.zone_id;
+ }
+
+ assert(ftl_addr_get_zone_offset(dev, addr) == 0);
+
+ ftl_trace_submission(dev, io, addr, 1);
+ rc = spdk_bdev_zone_management(dev->base_bdev_desc, ioch->base_ioch, addr.offset,
+ SPDK_BDEV_ZONE_RESET, ftl_io_cmpl_cb, io);
+ if (spdk_unlikely(rc)) {
+ ftl_io_fail(io, rc);
+ SPDK_ERRLOG("Vector reset failed with status: %d\n", rc);
+ break;
+ }
+
+ ftl_io_inc_req(io);
+ ftl_io_advance(io, 1);
+ }
+
+ if (ftl_io_done(io)) {
+ ftl_io_complete(io);
+ }
+
+ return rc;
+}
+
+static bool
+ftl_check_core_thread(const struct spdk_ftl_dev *dev)
+{
+ return dev->core_thread == spdk_get_thread();
+}
+
+struct spdk_io_channel *
+ftl_get_io_channel(const struct spdk_ftl_dev *dev)
+{
+ if (ftl_check_core_thread(dev)) {
+ return dev->ioch;
+ }
+
+ return NULL;
+}
+
+static void
+ftl_erase_fail(struct ftl_io *io, int status)
+{
+ struct ftl_zone *zone;
+ struct ftl_band *band = io->band;
+ char buf[128];
+
+ SPDK_ERRLOG("Erase failed at address: %s, status: %d\n",
+ ftl_addr2str(io->addr, buf, sizeof(buf)), status);
+
+ zone = ftl_band_zone_from_addr(band, io->addr);
+ zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
+ ftl_band_remove_zone(band, zone);
+ band->tail_md_addr = ftl_band_tail_md_addr(band);
+}
+
+static void
+ftl_zone_erase_cb(struct ftl_io *io, void *ctx, int status)
+{
+ struct ftl_zone *zone;
+
+ zone = ftl_band_zone_from_addr(io->band, io->addr);
+ zone->busy = false;
+
+ if (spdk_unlikely(status)) {
+ ftl_erase_fail(io, status);
+ return;
+ }
+
+ zone->info.state = SPDK_BDEV_ZONE_STATE_EMPTY;
+ zone->info.write_pointer = zone->info.zone_id;
+}
+
+static int
+ftl_band_erase(struct ftl_band *band)
+{
+ struct ftl_zone *zone;
+ struct ftl_io *io;
+ int rc = 0;
+
+ assert(band->state == FTL_BAND_STATE_CLOSED ||
+ band->state == FTL_BAND_STATE_FREE);
+
+ ftl_band_set_state(band, FTL_BAND_STATE_PREP);
+
+ CIRCLEQ_FOREACH(zone, &band->zones, circleq) {
+ if (zone->info.state == SPDK_BDEV_ZONE_STATE_EMPTY) {
+ continue;
+ }
+
+ io = ftl_io_erase_init(band, 1, ftl_zone_erase_cb);
+ if (!io) {
+ rc = -ENOMEM;
+ break;
+ }
+
+ zone->busy = true;
+ io->addr.offset = zone->info.zone_id;
+ rc = ftl_submit_erase(io);
+ if (rc) {
+ zone->busy = false;
+ assert(0);
+ /* TODO: change band's state back to close? */
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static struct ftl_band *
+ftl_next_write_band(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band;
+
+ /* Find a free band that has all of its data moved onto other closed bands */
+ LIST_FOREACH(band, &dev->free_bands, list_entry) {
+ assert(band->state == FTL_BAND_STATE_FREE);
+ if (band->num_reloc_bands == 0 && band->num_reloc_blocks == 0) {
+ break;
+ }
+ }
+
+ if (spdk_unlikely(!band)) {
+ return NULL;
+ }
+
+ if (ftl_band_erase(band)) {
+ /* TODO: handle erase failure */
+ return NULL;
+ }
+
+ return band;
+}
+
+static struct ftl_band *
+ftl_next_wptr_band(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band;
+
+ if (!dev->next_band) {
+ band = ftl_next_write_band(dev);
+ } else {
+ assert(dev->next_band->state == FTL_BAND_STATE_PREP);
+ band = dev->next_band;
+ dev->next_band = NULL;
+ }
+
+ return band;
+}
+
+static struct ftl_wptr *
+ftl_wptr_init(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_wptr *wptr;
+
+ wptr = calloc(1, sizeof(*wptr));
+ if (!wptr) {
+ return NULL;
+ }
+
+ wptr->dev = dev;
+ wptr->band = band;
+ wptr->zone = CIRCLEQ_FIRST(&band->zones);
+ wptr->addr.offset = wptr->zone->info.zone_id;
+ TAILQ_INIT(&wptr->pending_queue);
+
+ return wptr;
+}
+
+static int
+ftl_add_direct_wptr(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_wptr *wptr;
+
+ assert(band->state == FTL_BAND_STATE_OPEN);
+
+ wptr = ftl_wptr_init(band);
+ if (!wptr) {
+ return -1;
+ }
+
+ wptr->direct_mode = true;
+
+ if (ftl_band_alloc_lba_map(band)) {
+ ftl_wptr_free(wptr);
+ return -1;
+ }
+
+ LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry);
+
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: direct band %u\n", band->id);
+ ftl_trace_write_band(dev, band);
+ return 0;
+}
+
+static void
+ftl_close_direct_wptr(struct ftl_band *band)
+{
+ struct ftl_wptr *wptr = ftl_wptr_from_band(band);
+
+ assert(wptr);
+ assert(wptr->direct_mode);
+ assert(band->state == FTL_BAND_STATE_CLOSED);
+
+ ftl_band_release_lba_map(band);
+
+ ftl_remove_wptr(wptr);
+}
+
+int
+ftl_band_set_direct_access(struct ftl_band *band, bool access)
+{
+ if (access) {
+ return ftl_add_direct_wptr(band);
+ } else {
+ ftl_close_direct_wptr(band);
+ return 0;
+ }
+}
+
+static int
+ftl_add_wptr(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band;
+ struct ftl_wptr *wptr;
+
+ band = ftl_next_wptr_band(dev);
+ if (!band) {
+ return -1;
+ }
+
+ wptr = ftl_wptr_init(band);
+ if (!wptr) {
+ return -1;
+ }
+
+ if (ftl_band_write_prep(band)) {
+ ftl_wptr_free(wptr);
+ return -1;
+ }
+
+ LIST_INSERT_HEAD(&dev->wptr_list, wptr, list_entry);
+
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: band %u\n", band->id);
+ ftl_trace_write_band(dev, band);
+ return 0;
+}
+
+static void
+ftl_wptr_advance(struct ftl_wptr *wptr, size_t xfer_size)
+{
+ struct ftl_band *band = wptr->band;
+ struct spdk_ftl_dev *dev = wptr->dev;
+ struct spdk_ftl_conf *conf = &dev->conf;
+ size_t next_thld;
+
+ if (spdk_unlikely(wptr->direct_mode)) {
+ return;
+ }
+
+ wptr->offset += xfer_size;
+ next_thld = (ftl_band_num_usable_blocks(band) * conf->band_thld) / 100;
+
+ if (ftl_band_full(band, wptr->offset)) {
+ ftl_band_set_state(band, FTL_BAND_STATE_FULL);
+ }
+
+ wptr->zone->busy = true;
+ wptr->addr = ftl_band_next_xfer_addr(band, wptr->addr, xfer_size);
+ wptr->zone = ftl_band_next_operational_zone(band, wptr->zone);
+
+ assert(!ftl_addr_invalid(wptr->addr));
+
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "wptr: pu:%lu band:%lu, offset:%lu\n",
+ ftl_addr_get_punit(dev, wptr->addr),
+ ftl_addr_get_band(dev, wptr->addr),
+ wptr->addr.offset);
+
+ if (wptr->offset >= next_thld && !dev->next_band) {
+ dev->next_band = ftl_next_write_band(dev);
+ }
+}
+
+static size_t
+ftl_wptr_user_blocks_left(const struct ftl_wptr *wptr)
+{
+ return ftl_band_user_blocks_left(wptr->band, wptr->offset);
+}
+
+static bool
+ftl_wptr_ready(struct ftl_wptr *wptr)
+{
+ struct ftl_band *band = wptr->band;
+
+ /* TODO: add handling of empty bands */
+
+ if (spdk_unlikely(!ftl_zone_is_writable(wptr->dev, wptr->zone))) {
+ /* Erasing band may fail after it was assigned to wptr. */
+ if (spdk_unlikely(wptr->zone->info.state == SPDK_BDEV_ZONE_STATE_OFFLINE)) {
+ ftl_wptr_advance(wptr, wptr->dev->xfer_size);
+ }
+ return false;
+ }
+
+ /* If we're in the process of writing metadata, wait till it is */
+ /* completed. */
+ /* TODO: we should probably change bands once we're writing tail md */
+ if (ftl_band_state_changing(band)) {
+ return false;
+ }
+
+ if (band->state == FTL_BAND_STATE_FULL) {
+ if (wptr->num_outstanding == 0) {
+ if (ftl_wptr_close_band(wptr)) {
+ /* TODO: need recovery here */
+ assert(false);
+ }
+ }
+
+ return false;
+ }
+
+ if (band->state != FTL_BAND_STATE_OPEN) {
+ if (ftl_wptr_open_band(wptr)) {
+ /* TODO: need recovery here */
+ assert(false);
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
+int
+ftl_flush_active_bands(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg)
+{
+ struct ftl_wptr *wptr;
+ struct ftl_band_flush *flush;
+
+ assert(ftl_get_core_thread(dev) == spdk_get_thread());
+
+ flush = calloc(1, sizeof(*flush));
+ if (spdk_unlikely(!flush)) {
+ return -ENOMEM;
+ }
+
+ LIST_INSERT_HEAD(&dev->band_flush_list, flush, list_entry);
+
+ flush->cb_fn = cb_fn;
+ flush->cb_arg = cb_arg;
+ flush->dev = dev;
+
+ LIST_FOREACH(wptr, &dev->wptr_list, list_entry) {
+ wptr->flush = true;
+ flush->num_bands++;
+ }
+
+ return 0;
+}
+
+static const struct spdk_ftl_limit *
+ftl_get_limit(const struct spdk_ftl_dev *dev, int type)
+{
+ assert(type < SPDK_FTL_LIMIT_MAX);
+ return &dev->conf.limits[type];
+}
+
+static bool
+ftl_cache_lba_valid(struct spdk_ftl_dev *dev, struct ftl_wbuf_entry *entry)
+{
+ struct ftl_addr addr;
+
+ /* If the LBA is invalid don't bother checking the md and l2p */
+ if (spdk_unlikely(entry->lba == FTL_LBA_INVALID)) {
+ return false;
+ }
+
+ addr = ftl_l2p_get(dev, entry->lba);
+ if (!(ftl_addr_cached(addr) && entry == ftl_get_entry_from_addr(dev, addr))) {
+ return false;
+ }
+
+ return true;
+}
+
+void
+ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_wbuf_entry *entry)
+{
+ pthread_spin_lock(&entry->lock);
+
+ if (!entry->valid) {
+ goto unlock;
+ }
+
+ /* If the l2p wasn't updated and still points at the entry, fill it with the */
+ /* on-disk address and clear the cache status bit. Otherwise, skip the l2p update */
+ /* and just clear the cache status. */
+ if (!ftl_cache_lba_valid(dev, entry)) {
+ goto clear;
+ }
+
+ ftl_l2p_set(dev, entry->lba, entry->addr);
+clear:
+ entry->valid = false;
+unlock:
+ pthread_spin_unlock(&entry->lock);
+}
+
+static void
+ftl_pad_wbuf(struct spdk_ftl_dev *dev, size_t size)
+{
+ struct ftl_wbuf_entry *entry;
+ struct ftl_io_channel *ioch;
+ int flags = FTL_IO_PAD | FTL_IO_INTERNAL;
+
+ ioch = ftl_io_channel_get_ctx(ftl_get_io_channel(dev));
+
+ for (size_t i = 0; i < size; ++i) {
+ entry = ftl_acquire_wbuf_entry(ioch, flags);
+ if (!entry) {
+ break;
+ }
+
+ entry->lba = FTL_LBA_INVALID;
+ entry->addr = ftl_to_addr(FTL_ADDR_INVALID);
+ memset(entry->payload, 0, FTL_BLOCK_SIZE);
+
+ spdk_ring_enqueue(ioch->submit_queue, (void **)&entry, 1, NULL);
+ }
+}
+
+static void
+ftl_remove_free_bands(struct spdk_ftl_dev *dev)
+{
+ while (!LIST_EMPTY(&dev->free_bands)) {
+ LIST_REMOVE(LIST_FIRST(&dev->free_bands), list_entry);
+ }
+
+ dev->next_band = NULL;
+}
+
+static void
+ftl_wptr_pad_band(struct ftl_wptr *wptr)
+{
+ struct spdk_ftl_dev *dev = wptr->dev;
+ struct ftl_batch *batch = dev->current_batch;
+ struct ftl_io_channel *ioch;
+ size_t size, pad_size, blocks_left;
+
+ size = batch != NULL ? batch->num_entries : 0;
+ TAILQ_FOREACH(ioch, &dev->ioch_queue, tailq) {
+ size += spdk_ring_count(ioch->submit_queue);
+ }
+
+ ioch = ftl_io_channel_get_ctx(ftl_get_io_channel(dev));
+
+ blocks_left = ftl_wptr_user_blocks_left(wptr);
+ assert(size <= blocks_left);
+ assert(blocks_left % dev->xfer_size == 0);
+ pad_size = spdk_min(blocks_left - size, spdk_ring_count(ioch->free_queue));
+
+ ftl_pad_wbuf(dev, pad_size);
+}
+
+static void
+ftl_wptr_process_shutdown(struct ftl_wptr *wptr)
+{
+ struct spdk_ftl_dev *dev = wptr->dev;
+ struct ftl_batch *batch = dev->current_batch;
+ struct ftl_io_channel *ioch;
+ size_t size;
+
+ size = batch != NULL ? batch->num_entries : 0;
+ TAILQ_FOREACH(ioch, &dev->ioch_queue, tailq) {
+ size += spdk_ring_count(ioch->submit_queue);
+ }
+
+ if (size >= dev->xfer_size) {
+ return;
+ }
+
+ /* If we reach this point we need to remove free bands */
+ /* and pad current wptr band to the end */
+ ftl_remove_free_bands(dev);
+ ftl_wptr_pad_band(wptr);
+}
+
+static int
+ftl_shutdown_complete(struct spdk_ftl_dev *dev)
+{
+ struct ftl_io_channel *ioch = ftl_io_channel_get_ctx(dev->ioch);
+
+ return !__atomic_load_n(&dev->num_inflight, __ATOMIC_SEQ_CST) &&
+ dev->num_io_channels == 1 && LIST_EMPTY(&dev->wptr_list) &&
+ TAILQ_EMPTY(&ioch->retry_queue);
+}
+
+void
+ftl_apply_limits(struct spdk_ftl_dev *dev)
+{
+ const struct spdk_ftl_limit *limit;
+ struct ftl_io_channel *ioch;
+ struct ftl_stats *stats = &dev->stats;
+ uint32_t qdepth_limit = 100;
+ int i;
+
+ /* Clear existing limit */
+ dev->limit = SPDK_FTL_LIMIT_MAX;
+
+ for (i = SPDK_FTL_LIMIT_CRIT; i < SPDK_FTL_LIMIT_MAX; ++i) {
+ limit = ftl_get_limit(dev, i);
+
+ if (dev->num_free <= limit->thld) {
+ qdepth_limit = limit->limit;
+ stats->limits[i]++;
+ dev->limit = i;
+ break;
+ }
+ }
+
+ ftl_trace_limits(dev, dev->limit, dev->num_free);
+ TAILQ_FOREACH(ioch, &dev->ioch_queue, tailq) {
+ __atomic_store_n(&ioch->qdepth_limit, (qdepth_limit * ioch->num_entries) / 100,
+ __ATOMIC_SEQ_CST);
+ }
+}
+
+static int
+ftl_invalidate_addr_unlocked(struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ struct ftl_band *band = ftl_band_from_addr(dev, addr);
+ struct ftl_lba_map *lba_map = &band->lba_map;
+ uint64_t offset;
+
+ offset = ftl_band_block_offset_from_addr(band, addr);
+
+ /* The bit might be already cleared if two writes are scheduled to the */
+ /* same LBA at the same time */
+ if (spdk_bit_array_get(lba_map->vld, offset)) {
+ assert(lba_map->num_vld > 0);
+ spdk_bit_array_clear(lba_map->vld, offset);
+ lba_map->num_vld--;
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ struct ftl_band *band;
+ int rc;
+
+ assert(!ftl_addr_cached(addr));
+ band = ftl_band_from_addr(dev, addr);
+
+ pthread_spin_lock(&band->lba_map.lock);
+ rc = ftl_invalidate_addr_unlocked(dev, addr);
+ pthread_spin_unlock(&band->lba_map.lock);
+
+ return rc;
+}
+
+static int
+ftl_read_retry(int rc)
+{
+ return rc == -EAGAIN;
+}
+
+static int
+ftl_read_canceled(int rc)
+{
+ return rc == -EFAULT || rc == 0;
+}
+
+static int
+ftl_cache_read(struct ftl_io *io, uint64_t lba,
+ struct ftl_addr addr, void *buf)
+{
+ struct ftl_wbuf_entry *entry;
+ struct ftl_addr naddr;
+ int rc = 0;
+
+ entry = ftl_get_entry_from_addr(io->dev, addr);
+ pthread_spin_lock(&entry->lock);
+
+ naddr = ftl_l2p_get(io->dev, lba);
+ if (addr.offset != naddr.offset) {
+ rc = -1;
+ goto out;
+ }
+
+ memcpy(buf, entry->payload, FTL_BLOCK_SIZE);
+out:
+ pthread_spin_unlock(&entry->lock);
+ return rc;
+}
+
+static int
+ftl_read_next_logical_addr(struct ftl_io *io, struct ftl_addr *addr)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_addr next_addr;
+ size_t i;
+
+ *addr = ftl_l2p_get(dev, ftl_io_current_lba(io));
+
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Read addr:%lx, lba:%lu\n",
+ addr->offset, ftl_io_current_lba(io));
+
+ /* If the address is invalid, skip it (the buffer should already be zero'ed) */
+ if (ftl_addr_invalid(*addr)) {
+ return -EFAULT;
+ }
+
+ if (ftl_addr_cached(*addr)) {
+ if (!ftl_cache_read(io, ftl_io_current_lba(io), *addr, ftl_io_iovec_addr(io))) {
+ return 0;
+ }
+
+ /* If the state changed, we have to re-read the l2p */
+ return -EAGAIN;
+ }
+
+ for (i = 1; i < ftl_io_iovec_len_left(io); ++i) {
+ next_addr = ftl_l2p_get(dev, ftl_io_get_lba(io, io->pos + i));
+
+ if (ftl_addr_invalid(next_addr) || ftl_addr_cached(next_addr)) {
+ break;
+ }
+
+ if (addr->offset + i != next_addr.offset) {
+ break;
+ }
+ }
+
+ return i;
+}
+
+static int
+ftl_submit_read(struct ftl_io *io)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_io_channel *ioch;
+ struct ftl_addr addr;
+ int rc = 0, num_blocks;
+
+ ioch = ftl_io_channel_get_ctx(io->ioch);
+
+ assert(LIST_EMPTY(&io->children));
+
+ while (io->pos < io->num_blocks) {
+ if (ftl_io_mode_physical(io)) {
+ num_blocks = rc = ftl_read_next_physical_addr(io, &addr);
+ } else {
+ num_blocks = rc = ftl_read_next_logical_addr(io, &addr);
+ }
+
+ /* We might need to retry the read from scratch (e.g. */
+ /* because write was under way and completed before */
+ /* we could read it from the write buffer */
+ if (ftl_read_retry(rc)) {
+ continue;
+ }
+
+ /* We don't have to schedule the read, as it was read from cache */
+ if (ftl_read_canceled(rc)) {
+ ftl_io_advance(io, 1);
+ ftl_trace_completion(io->dev, io, rc ? FTL_TRACE_COMPLETION_INVALID :
+ FTL_TRACE_COMPLETION_CACHE);
+ rc = 0;
+ continue;
+ }
+
+ assert(num_blocks > 0);
+
+ ftl_trace_submission(dev, io, addr, num_blocks);
+ rc = spdk_bdev_read_blocks(dev->base_bdev_desc, ioch->base_ioch,
+ ftl_io_iovec_addr(io),
+ addr.offset,
+ num_blocks, ftl_io_cmpl_cb, io);
+ if (spdk_unlikely(rc)) {
+ if (rc == -ENOMEM) {
+ TAILQ_INSERT_TAIL(&ioch->retry_queue, io, ioch_entry);
+ rc = 0;
+ } else {
+ ftl_io_fail(io, rc);
+ }
+ break;
+ }
+
+ ftl_io_inc_req(io);
+ ftl_io_advance(io, num_blocks);
+ }
+
+ /* If we didn't have to read anything from the device, */
+ /* complete the request right away */
+ if (ftl_io_done(io)) {
+ ftl_io_complete(io);
+ }
+
+ return rc;
+}
+
+static void
+ftl_complete_flush(struct ftl_flush *flush)
+{
+ assert(flush->num_req == 0);
+ LIST_REMOVE(flush, list_entry);
+
+ flush->cb.fn(flush->cb.ctx, 0);
+
+ spdk_bit_array_free(&flush->bmap);
+ free(flush);
+}
+
+static void
+ftl_process_flush(struct spdk_ftl_dev *dev, struct ftl_batch *batch)
+{
+ struct ftl_flush *flush, *tflush;
+ size_t offset;
+
+ LIST_FOREACH_SAFE(flush, &dev->flush_list, list_entry, tflush) {
+ offset = batch->index;
+
+ if (spdk_bit_array_get(flush->bmap, offset)) {
+ spdk_bit_array_clear(flush->bmap, offset);
+ if (!(--flush->num_req)) {
+ ftl_complete_flush(flush);
+ }
+ }
+ }
+}
+
+static void
+ftl_nv_cache_wrap_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_nv_cache *nv_cache = cb_arg;
+
+ if (!success) {
+ SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
+ /* TODO: go into read-only mode */
+ assert(0);
+ }
+
+ pthread_spin_lock(&nv_cache->lock);
+ nv_cache->ready = true;
+ pthread_spin_unlock(&nv_cache->lock);
+
+ spdk_bdev_free_io(bdev_io);
+}
+
+static void
+ftl_nv_cache_wrap(void *ctx)
+{
+ struct ftl_nv_cache *nv_cache = ctx;
+ int rc;
+
+ rc = ftl_nv_cache_write_header(nv_cache, false, ftl_nv_cache_wrap_cb, nv_cache);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to write non-volatile cache metadata header: %s\n",
+ spdk_strerror(-rc));
+ /* TODO: go into read-only mode */
+ assert(0);
+ }
+}
+
+static uint64_t
+ftl_reserve_nv_cache(struct ftl_nv_cache *nv_cache, size_t *num_blocks, unsigned int *phase)
+{
+ struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+ struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache);
+ uint64_t num_available, cache_size, cache_addr = FTL_LBA_INVALID;
+
+ cache_size = spdk_bdev_get_num_blocks(bdev);
+
+ pthread_spin_lock(&nv_cache->lock);
+ if (spdk_unlikely(nv_cache->num_available == 0 || !nv_cache->ready)) {
+ goto out;
+ }
+
+ num_available = spdk_min(nv_cache->num_available, *num_blocks);
+ num_available = spdk_min(num_available, dev->conf.nv_cache.max_request_cnt);
+
+ if (spdk_unlikely(nv_cache->current_addr + num_available > cache_size)) {
+ *num_blocks = cache_size - nv_cache->current_addr;
+ } else {
+ *num_blocks = num_available;
+ }
+
+ cache_addr = nv_cache->current_addr;
+ nv_cache->current_addr += *num_blocks;
+ nv_cache->num_available -= *num_blocks;
+ *phase = nv_cache->phase;
+
+ if (nv_cache->current_addr == spdk_bdev_get_num_blocks(bdev)) {
+ nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
+ nv_cache->phase = ftl_nv_cache_next_phase(nv_cache->phase);
+ nv_cache->ready = false;
+ spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_nv_cache_wrap, nv_cache);
+ }
+out:
+ pthread_spin_unlock(&nv_cache->lock);
+ return cache_addr;
+}
+
+static struct ftl_io *
+ftl_alloc_io_nv_cache(struct ftl_io *parent, size_t num_blocks)
+{
+ struct ftl_io_init_opts opts = {
+ .dev = parent->dev,
+ .parent = parent,
+ .iovcnt = 0,
+ .num_blocks = num_blocks,
+ .flags = parent->flags | FTL_IO_CACHE,
+ };
+
+ return ftl_io_init_internal(&opts);
+}
+
+static void
+ftl_nv_cache_submit_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_io *io = cb_arg;
+ struct ftl_nv_cache *nv_cache = &io->dev->nv_cache;
+
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Non-volatile cache write failed at %"PRIx64"\n", io->addr.offset);
+ io->status = -EIO;
+ }
+
+ ftl_io_dec_req(io);
+ if (ftl_io_done(io)) {
+ spdk_mempool_put(nv_cache->md_pool, io->md);
+ ftl_io_complete(io);
+ }
+
+ spdk_bdev_free_io(bdev_io);
+}
+
+static void
+ftl_submit_nv_cache(void *ctx)
+{
+ struct ftl_io *io = ctx;
+ struct spdk_ftl_dev *dev = io->dev;
+ struct spdk_thread *thread;
+ struct ftl_nv_cache *nv_cache = &dev->nv_cache;
+ struct ftl_io_channel *ioch;
+ int rc;
+
+ ioch = ftl_io_channel_get_ctx(io->ioch);
+ thread = spdk_io_channel_get_thread(io->ioch);
+
+ rc = spdk_bdev_write_blocks_with_md(nv_cache->bdev_desc, ioch->cache_ioch,
+ ftl_io_iovec_addr(io), io->md, io->addr.offset,
+ io->num_blocks, ftl_nv_cache_submit_cb, io);
+ if (rc == -ENOMEM) {
+ spdk_thread_send_msg(thread, ftl_submit_nv_cache, io);
+ return;
+ } else if (rc) {
+ SPDK_ERRLOG("Write to persistent cache failed: %s (%"PRIu64", %"PRIu64")\n",
+ spdk_strerror(-rc), io->addr.offset, io->num_blocks);
+ spdk_mempool_put(nv_cache->md_pool, io->md);
+ io->status = -EIO;
+ ftl_io_complete(io);
+ return;
+ }
+
+ ftl_io_advance(io, io->num_blocks);
+ ftl_io_inc_req(io);
+}
+
+static void
+ftl_nv_cache_fill_md(struct ftl_io *io, unsigned int phase)
+{
+ struct spdk_bdev *bdev;
+ struct ftl_nv_cache *nv_cache = &io->dev->nv_cache;
+ uint64_t block_off, lba;
+ void *md_buf = io->md;
+
+ bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+
+ for (block_off = 0; block_off < io->num_blocks; ++block_off) {
+ lba = ftl_nv_cache_pack_lba(ftl_io_get_lba(io, block_off), phase);
+ memcpy(md_buf, &lba, sizeof(lba));
+ md_buf += spdk_bdev_get_md_size(bdev);
+ }
+}
+
+static void
+_ftl_write_nv_cache(void *ctx)
+{
+ struct ftl_io *child, *io = ctx;
+ struct spdk_ftl_dev *dev = io->dev;
+ struct spdk_thread *thread;
+ unsigned int phase;
+ uint64_t num_blocks;
+
+ thread = spdk_io_channel_get_thread(io->ioch);
+
+ while (io->pos < io->num_blocks) {
+ num_blocks = ftl_io_iovec_len_left(io);
+
+ child = ftl_alloc_io_nv_cache(io, num_blocks);
+ if (spdk_unlikely(!child)) {
+ spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
+ return;
+ }
+
+ child->md = spdk_mempool_get(dev->nv_cache.md_pool);
+ if (spdk_unlikely(!child->md)) {
+ ftl_io_free(child);
+ spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
+ break;
+ }
+
+ /* Reserve area on the write buffer cache */
+ child->addr.offset = ftl_reserve_nv_cache(&dev->nv_cache, &num_blocks, &phase);
+ if (child->addr.offset == FTL_LBA_INVALID) {
+ spdk_mempool_put(dev->nv_cache.md_pool, child->md);
+ ftl_io_free(child);
+ spdk_thread_send_msg(thread, _ftl_write_nv_cache, io);
+ break;
+ }
+
+ /* Shrink the IO if there isn't enough room in the cache to fill the whole iovec */
+ if (spdk_unlikely(num_blocks != ftl_io_iovec_len_left(io))) {
+ ftl_io_shrink_iovec(child, num_blocks);
+ }
+
+ ftl_nv_cache_fill_md(child, phase);
+ ftl_submit_nv_cache(child);
+ }
+
+ if (ftl_io_done(io)) {
+ ftl_io_complete(io);
+ }
+}
+
+static void
+ftl_write_nv_cache(struct ftl_io *parent)
+{
+ ftl_io_reset(parent);
+ parent->flags |= FTL_IO_CACHE;
+ _ftl_write_nv_cache(parent);
+}
+
+int
+ftl_nv_cache_write_header(struct ftl_nv_cache *nv_cache, bool shutdown,
+ spdk_bdev_io_completion_cb cb_fn, void *cb_arg)
+{
+ struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache);
+ struct ftl_nv_cache_header *hdr = nv_cache->dma_buf;
+ struct spdk_bdev *bdev;
+ struct ftl_io_channel *ioch;
+
+ bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+ ioch = ftl_io_channel_get_ctx(ftl_get_io_channel(dev));
+
+ memset(hdr, 0, spdk_bdev_get_block_size(bdev));
+
+ hdr->phase = (uint8_t)nv_cache->phase;
+ hdr->size = spdk_bdev_get_num_blocks(bdev);
+ hdr->uuid = dev->uuid;
+ hdr->version = FTL_NV_CACHE_HEADER_VERSION;
+ hdr->current_addr = shutdown ? nv_cache->current_addr : FTL_LBA_INVALID;
+ hdr->checksum = spdk_crc32c_update(hdr, offsetof(struct ftl_nv_cache_header, checksum), 0);
+
+ return spdk_bdev_write_blocks(nv_cache->bdev_desc, ioch->cache_ioch, hdr, 0, 1,
+ cb_fn, cb_arg);
+}
+
+int
+ftl_nv_cache_scrub(struct ftl_nv_cache *nv_cache, spdk_bdev_io_completion_cb cb_fn, void *cb_arg)
+{
+ struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache);
+ struct ftl_io_channel *ioch;
+ struct spdk_bdev *bdev;
+
+ ioch = ftl_io_channel_get_ctx(ftl_get_io_channel(dev));
+ bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+
+ return spdk_bdev_write_zeroes_blocks(nv_cache->bdev_desc, ioch->cache_ioch, 1,
+ spdk_bdev_get_num_blocks(bdev) - 1,
+ cb_fn, cb_arg);
+}
+
+static void
+ftl_write_fail(struct ftl_io *io, int status)
+{
+ struct ftl_batch *batch = io->batch;
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_wbuf_entry *entry;
+ struct ftl_band *band;
+ char buf[128];
+
+ entry = TAILQ_FIRST(&batch->entries);
+
+ band = ftl_band_from_addr(io->dev, entry->addr);
+ SPDK_ERRLOG("Write failed @addr: %s, status: %d\n",
+ ftl_addr2str(entry->addr, buf, sizeof(buf)), status);
+
+ /* Close the band and, halt wptr and defrag */
+ ftl_halt_writes(dev, band);
+
+ TAILQ_FOREACH(entry, &batch->entries, tailq) {
+ /* Invalidate meta set by process_writes() */
+ ftl_invalidate_addr(dev, entry->addr);
+ }
+
+ /* Reset the batch back to the write buffer to resend it later */
+ TAILQ_INSERT_TAIL(&dev->pending_batches, batch, tailq);
+}
+
+static void
+ftl_write_cb(struct ftl_io *io, void *arg, int status)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_batch *batch = io->batch;
+ struct ftl_wbuf_entry *entry;
+ struct ftl_band *band;
+ struct ftl_addr prev_addr, addr = io->addr;
+
+ if (status) {
+ ftl_write_fail(io, status);
+ return;
+ }
+
+ assert(io->num_blocks == dev->xfer_size);
+ assert(!(io->flags & FTL_IO_MD));
+
+ TAILQ_FOREACH(entry, &batch->entries, tailq) {
+ band = entry->band;
+ if (!(entry->io_flags & FTL_IO_PAD)) {
+ /* Verify that the LBA is set for user blocks */
+ assert(entry->lba != FTL_LBA_INVALID);
+ }
+
+ if (band != NULL) {
+ assert(band->num_reloc_blocks > 0);
+ band->num_reloc_blocks--;
+ }
+
+ entry->addr = addr;
+ if (entry->lba != FTL_LBA_INVALID) {
+ pthread_spin_lock(&entry->lock);
+ prev_addr = ftl_l2p_get(dev, entry->lba);
+
+ /* If the l2p was updated in the meantime, don't update band's metadata */
+ if (ftl_addr_cached(prev_addr) &&
+ entry == ftl_get_entry_from_addr(dev, prev_addr)) {
+ /* Setting entry's cache bit needs to be done after metadata */
+ /* within the band is updated to make sure that writes */
+ /* invalidating the entry clear the metadata as well */
+ ftl_band_set_addr(io->band, entry->lba, entry->addr);
+ entry->valid = true;
+ }
+ pthread_spin_unlock(&entry->lock);
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write addr:%lu, lba:%lu\n",
+ entry->addr.offset, entry->lba);
+
+ addr = ftl_band_next_addr(io->band, addr, 1);
+ }
+
+ ftl_process_flush(dev, batch);
+ ftl_release_batch(dev, batch);
+}
+
+static void
+ftl_update_stats(struct spdk_ftl_dev *dev, const struct ftl_wbuf_entry *entry)
+{
+ if (!(entry->io_flags & FTL_IO_INTERNAL)) {
+ dev->stats.write_user++;
+ }
+ dev->stats.write_total++;
+}
+
+static void
+ftl_update_l2p(struct spdk_ftl_dev *dev, const struct ftl_wbuf_entry *entry,
+ struct ftl_addr addr)
+{
+ struct ftl_addr prev_addr;
+ struct ftl_wbuf_entry *prev;
+ struct ftl_band *band;
+ int valid;
+ bool io_weak = entry->io_flags & FTL_IO_WEAK;
+
+ prev_addr = ftl_l2p_get(dev, entry->lba);
+ if (ftl_addr_invalid(prev_addr)) {
+ ftl_l2p_set(dev, entry->lba, addr);
+ return;
+ }
+
+ if (ftl_addr_cached(prev_addr)) {
+ prev = ftl_get_entry_from_addr(dev, prev_addr);
+ pthread_spin_lock(&prev->lock);
+
+ /* Re-read the L2P under the lock to protect against updates */
+ /* to this LBA from other threads */
+ prev_addr = ftl_l2p_get(dev, entry->lba);
+
+ /* If the entry is no longer in cache, another write has been */
+ /* scheduled in the meantime, so we can return to evicted path */
+ if (!ftl_addr_cached(prev_addr)) {
+ pthread_spin_unlock(&prev->lock);
+ goto evicted;
+ }
+
+ /*
+ * Relocating block could still reside in cache due to fact that write
+ * buffers are independent for each IO channel and enough amount of data
+ * (write unit size) must be collected before it will be submitted to lower
+ * layer.
+ * When previous entry wasn't overwritten invalidate old address and entry.
+ * Otherwise skip relocating block.
+ */
+ if (io_weak &&
+ /* Check if prev_addr was updated in meantime */
+ !(ftl_addr_cmp(prev_addr, ftl_get_addr_from_entry(prev)) &&
+ /* Check if relocating address it the same as in previous entry */
+ ftl_addr_cmp(prev->addr, entry->addr))) {
+ pthread_spin_unlock(&prev->lock);
+ return;
+ }
+
+ /*
+ * If previous entry is part of cache and was written into disk remove
+ * and invalidate it
+ */
+ if (prev->valid) {
+ ftl_invalidate_addr(dev, prev->addr);
+ prev->valid = false;
+ }
+
+ ftl_l2p_set(dev, entry->lba, addr);
+ pthread_spin_unlock(&prev->lock);
+ return;
+ }
+
+evicted:
+ /*
+ * If the L2P's physical address is different than what we expected we don't need to
+ * do anything (someone's already overwritten our data).
+ */
+ if (io_weak && !ftl_addr_cmp(prev_addr, entry->addr)) {
+ return;
+ }
+
+ /* Lock the band containing previous physical address. This assures atomic changes to */
+ /* the L2P as wall as metadata. The valid bits in metadata are used to */
+ /* check weak writes validity. */
+ band = ftl_band_from_addr(dev, prev_addr);
+ pthread_spin_lock(&band->lba_map.lock);
+
+ valid = ftl_invalidate_addr_unlocked(dev, prev_addr);
+
+ /* If the address has been invalidated already, we don't want to update */
+ /* the L2P for weak writes, as it means the write is no longer valid. */
+ if (!io_weak || valid) {
+ ftl_l2p_set(dev, entry->lba, addr);
+ }
+
+ pthread_spin_unlock(&band->lba_map.lock);
+}
+
+static struct ftl_io *
+ftl_io_init_child_write(struct ftl_io *parent, struct ftl_addr addr, ftl_io_fn cb)
+{
+ struct ftl_io *io;
+ struct spdk_ftl_dev *dev = parent->dev;
+ struct ftl_io_init_opts opts = {
+ .dev = dev,
+ .io = NULL,
+ .parent = parent,
+ .band = parent->band,
+ .size = sizeof(struct ftl_io),
+ .flags = 0,
+ .type = parent->type,
+ .num_blocks = dev->xfer_size,
+ .cb_fn = cb,
+ .iovcnt = 0,
+ };
+
+ io = ftl_io_init_internal(&opts);
+ if (!io) {
+ return NULL;
+ }
+
+ io->addr = addr;
+
+ return io;
+}
+
+static void
+ftl_io_child_write_cb(struct ftl_io *io, void *ctx, int status)
+{
+ struct ftl_zone *zone;
+ struct ftl_wptr *wptr;
+
+ zone = ftl_band_zone_from_addr(io->band, io->addr);
+ wptr = ftl_wptr_from_band(io->band);
+
+ zone->busy = false;
+ zone->info.write_pointer += io->num_blocks;
+
+ if (zone->info.write_pointer == zone->info.zone_id + zone->info.capacity) {
+ zone->info.state = SPDK_BDEV_ZONE_STATE_FULL;
+ }
+
+ /* If some other write on the same band failed the write pointer would already be freed */
+ if (spdk_likely(wptr)) {
+ wptr->num_outstanding--;
+ }
+}
+
+static int
+ftl_submit_child_write(struct ftl_wptr *wptr, struct ftl_io *io)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_io_channel *ioch;
+ struct ftl_io *child;
+ struct ftl_addr addr;
+ int rc;
+
+ ioch = ftl_io_channel_get_ctx(io->ioch);
+
+ if (spdk_likely(!wptr->direct_mode)) {
+ addr = wptr->addr;
+ } else {
+ assert(io->flags & FTL_IO_DIRECT_ACCESS);
+ assert(ftl_addr_get_band(dev, io->addr) == wptr->band->id);
+ addr = io->addr;
+ }
+
+ /* Split IO to child requests and release zone immediately after child is completed */
+ child = ftl_io_init_child_write(io, addr, ftl_io_child_write_cb);
+ if (!child) {
+ return -EAGAIN;
+ }
+
+ wptr->num_outstanding++;
+
+ if (ftl_is_append_supported(dev)) {
+ rc = spdk_bdev_zone_appendv(dev->base_bdev_desc, ioch->base_ioch,
+ child->iov, child->iov_cnt,
+ ftl_addr_get_zone_slba(dev, addr),
+ dev->xfer_size, ftl_io_cmpl_cb, child);
+ } else {
+ rc = spdk_bdev_writev_blocks(dev->base_bdev_desc, ioch->base_ioch,
+ child->iov, child->iov_cnt, addr.offset,
+ dev->xfer_size, ftl_io_cmpl_cb, child);
+ }
+
+ if (rc) {
+ wptr->num_outstanding--;
+ ftl_io_fail(child, rc);
+ ftl_io_complete(child);
+ SPDK_ERRLOG("spdk_bdev_write_blocks_with_md failed with status:%d, addr:%lu\n",
+ rc, addr.offset);
+ return -EIO;
+ }
+
+ ftl_io_inc_req(child);
+ ftl_io_advance(child, dev->xfer_size);
+
+ return 0;
+}
+
+static int
+ftl_submit_write(struct ftl_wptr *wptr, struct ftl_io *io)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ int rc = 0;
+
+ assert(io->num_blocks % dev->xfer_size == 0);
+
+ while (io->iov_pos < io->iov_cnt) {
+ /* There are no guarantees of the order of completion of NVMe IO submission queue */
+ /* so wait until zone is not busy before submitting another write */
+ if (!ftl_is_append_supported(dev) && wptr->zone->busy) {
+ TAILQ_INSERT_TAIL(&wptr->pending_queue, io, ioch_entry);
+ rc = -EAGAIN;
+ break;
+ }
+
+ rc = ftl_submit_child_write(wptr, io);
+ if (spdk_unlikely(rc)) {
+ if (rc == -EAGAIN) {
+ TAILQ_INSERT_TAIL(&wptr->pending_queue, io, ioch_entry);
+ } else {
+ ftl_io_fail(io, rc);
+ }
+ break;
+ }
+
+ ftl_trace_submission(dev, io, wptr->addr, dev->xfer_size);
+ ftl_wptr_advance(wptr, dev->xfer_size);
+ }
+
+ if (ftl_io_done(io)) {
+ /* Parent IO will complete after all children are completed */
+ ftl_io_complete(io);
+ }
+
+ return rc;
+}
+
+static void
+ftl_flush_pad_batch(struct spdk_ftl_dev *dev)
+{
+ struct ftl_batch *batch = dev->current_batch;
+ struct ftl_io_channel *ioch;
+ size_t size = 0, num_entries = 0;
+
+ assert(batch != NULL);
+ assert(batch->num_entries < dev->xfer_size);
+
+ TAILQ_FOREACH(ioch, &dev->ioch_queue, tailq) {
+ size += spdk_ring_count(ioch->submit_queue);
+ }
+
+ num_entries = dev->xfer_size - batch->num_entries;
+ if (size < num_entries) {
+ ftl_pad_wbuf(dev, num_entries - size);
+ }
+}
+
+static bool
+ftl_check_io_channel_flush(struct spdk_ftl_dev *dev)
+{
+ struct ftl_io_channel *ioch;
+
+ TAILQ_FOREACH(ioch, &dev->ioch_queue, tailq) {
+ if (ioch->flush && spdk_ring_count(ioch->free_queue) != ioch->num_entries) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int
+ftl_wptr_process_writes(struct ftl_wptr *wptr)
+{
+ struct spdk_ftl_dev *dev = wptr->dev;
+ struct ftl_batch *batch;
+ struct ftl_wbuf_entry *entry;
+ struct ftl_io *io;
+
+ if (spdk_unlikely(!TAILQ_EMPTY(&wptr->pending_queue))) {
+ io = TAILQ_FIRST(&wptr->pending_queue);
+ TAILQ_REMOVE(&wptr->pending_queue, io, ioch_entry);
+
+ if (ftl_submit_write(wptr, io) == -EAGAIN) {
+ return 0;
+ }
+ }
+
+ /* Make sure the band is prepared for writing */
+ if (!ftl_wptr_ready(wptr)) {
+ return 0;
+ }
+
+ if (dev->halt) {
+ ftl_wptr_process_shutdown(wptr);
+ }
+
+ if (spdk_unlikely(wptr->flush)) {
+ ftl_wptr_pad_band(wptr);
+ }
+
+ batch = ftl_get_next_batch(dev);
+ if (!batch) {
+ /* If there are queued flush requests we need to pad the write buffer to */
+ /* force out remaining entries */
+ if (!LIST_EMPTY(&dev->flush_list) || ftl_check_io_channel_flush(dev)) {
+ ftl_flush_pad_batch(dev);
+ }
+
+ return 0;
+ }
+
+ io = ftl_io_wbuf_init(dev, wptr->addr, wptr->band, batch, ftl_write_cb);
+ if (!io) {
+ goto error;
+ }
+
+ TAILQ_FOREACH(entry, &batch->entries, tailq) {
+ /* Update band's relocation stats if the IO comes from reloc */
+ if (entry->io_flags & FTL_IO_WEAK) {
+ if (!spdk_bit_array_get(wptr->band->reloc_bitmap, entry->band->id)) {
+ spdk_bit_array_set(wptr->band->reloc_bitmap, entry->band->id);
+ entry->band->num_reloc_bands++;
+ }
+ }
+
+ ftl_trace_wbuf_pop(dev, entry);
+ ftl_update_stats(dev, entry);
+ }
+
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_CORE, "Write addr:%lx\n", wptr->addr.offset);
+
+ if (ftl_submit_write(wptr, io)) {
+ /* TODO: we need some recovery here */
+ assert(0 && "Write submit failed");
+ if (ftl_io_done(io)) {
+ ftl_io_free(io);
+ }
+ }
+
+ return dev->xfer_size;
+error:
+ TAILQ_INSERT_TAIL(&dev->pending_batches, batch, tailq);
+ return 0;
+}
+
+static int
+ftl_process_writes(struct spdk_ftl_dev *dev)
+{
+ struct ftl_wptr *wptr, *twptr;
+ size_t num_active = 0;
+ enum ftl_band_state state;
+
+ LIST_FOREACH_SAFE(wptr, &dev->wptr_list, list_entry, twptr) {
+ ftl_wptr_process_writes(wptr);
+ state = wptr->band->state;
+
+ if (state != FTL_BAND_STATE_FULL &&
+ state != FTL_BAND_STATE_CLOSING &&
+ state != FTL_BAND_STATE_CLOSED) {
+ num_active++;
+ }
+ }
+
+ if (num_active < 1) {
+ ftl_add_wptr(dev);
+ }
+
+ return 0;
+}
+
+static void
+ftl_fill_wbuf_entry(struct ftl_wbuf_entry *entry, struct ftl_io *io)
+{
+ memcpy(entry->payload, ftl_io_iovec_addr(io), FTL_BLOCK_SIZE);
+
+ if (entry->io_flags & FTL_IO_WEAK) {
+ entry->band = ftl_band_from_addr(io->dev, io->addr);
+ entry->addr = ftl_band_next_addr(entry->band, io->addr, io->pos);
+ entry->band->num_reloc_blocks++;
+ }
+
+ entry->trace = io->trace;
+ entry->lba = ftl_io_current_lba(io);
+}
+
+static int
+ftl_wbuf_fill(struct ftl_io *io)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_io_channel *ioch;
+ struct ftl_wbuf_entry *entry;
+
+ ioch = ftl_io_channel_get_ctx(io->ioch);
+
+ while (io->pos < io->num_blocks) {
+ if (ftl_io_current_lba(io) == FTL_LBA_INVALID) {
+ ftl_io_advance(io, 1);
+ continue;
+ }
+
+ entry = ftl_acquire_wbuf_entry(ioch, io->flags);
+ if (!entry) {
+ TAILQ_INSERT_TAIL(&ioch->retry_queue, io, ioch_entry);
+ return 0;
+ }
+
+ ftl_fill_wbuf_entry(entry, io);
+
+ ftl_trace_wbuf_fill(dev, io);
+ ftl_update_l2p(dev, entry, ftl_get_addr_from_entry(entry));
+ ftl_io_advance(io, 1);
+
+ /* Needs to be done after L2P is updated to avoid race with */
+ /* write completion callback when it's processed faster than */
+ /* L2P is set in update_l2p(). */
+ spdk_ring_enqueue(ioch->submit_queue, (void **)&entry, 1, NULL);
+ }
+
+ if (ftl_io_done(io)) {
+ if (ftl_dev_has_nv_cache(dev) && !(io->flags & FTL_IO_BYPASS_CACHE)) {
+ ftl_write_nv_cache(io);
+ } else {
+ TAILQ_INSERT_TAIL(&ioch->write_cmpl_queue, io, ioch_entry);
+ }
+ }
+
+ return 0;
+}
+
+static bool
+ftl_dev_needs_defrag(struct spdk_ftl_dev *dev)
+{
+ const struct spdk_ftl_limit *limit = ftl_get_limit(dev, SPDK_FTL_LIMIT_START);
+
+ if (ftl_reloc_is_halted(dev->reloc)) {
+ return false;
+ }
+
+ if (ftl_reloc_is_defrag_active(dev->reloc)) {
+ return false;
+ }
+
+ if (dev->num_free <= limit->thld) {
+ return true;
+ }
+
+ return false;
+}
+
+static double
+ftl_band_calc_merit(struct ftl_band *band, size_t *threshold_valid)
+{
+ size_t usable, valid, invalid;
+ double vld_ratio;
+
+ /* If the band doesn't have any usable blocks it's of no use */
+ usable = ftl_band_num_usable_blocks(band);
+ if (usable == 0) {
+ return 0.0;
+ }
+
+ valid = threshold_valid ? (usable - *threshold_valid) : band->lba_map.num_vld;
+ invalid = usable - valid;
+
+ /* Add one to avoid division by 0 */
+ vld_ratio = (double)invalid / (double)(valid + 1);
+ return vld_ratio * ftl_band_age(band);
+}
+
+static bool
+ftl_band_needs_defrag(struct ftl_band *band, struct spdk_ftl_dev *dev)
+{
+ struct spdk_ftl_conf *conf = &dev->conf;
+ size_t thld_vld;
+
+ /* If we're in dire need of free bands, every band is worth defragging */
+ if (ftl_current_limit(dev) == SPDK_FTL_LIMIT_CRIT) {
+ return true;
+ }
+
+ thld_vld = (ftl_band_num_usable_blocks(band) * conf->invalid_thld) / 100;
+
+ return band->merit > ftl_band_calc_merit(band, &thld_vld);
+}
+
+static struct ftl_band *
+ftl_select_defrag_band(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band, *mband = NULL;
+ double merit = 0;
+
+ LIST_FOREACH(band, &dev->shut_bands, list_entry) {
+ assert(band->state == FTL_BAND_STATE_CLOSED);
+ band->merit = ftl_band_calc_merit(band, NULL);
+ if (band->merit > merit) {
+ merit = band->merit;
+ mband = band;
+ }
+ }
+
+ if (mband && !ftl_band_needs_defrag(mband, dev)) {
+ mband = NULL;
+ }
+
+ return mband;
+}
+
+static void
+ftl_process_relocs(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band;
+
+ if (ftl_dev_needs_defrag(dev)) {
+ band = ftl_select_defrag_band(dev);
+ if (band) {
+ ftl_reloc_add(dev->reloc, band, 0, ftl_get_num_blocks_in_band(dev), 0, true);
+ ftl_trace_defrag_band(dev, band);
+ }
+ }
+
+ ftl_reloc(dev->reloc);
+}
+
+int
+ftl_current_limit(const struct spdk_ftl_dev *dev)
+{
+ return dev->limit;
+}
+
+void
+spdk_ftl_dev_get_attrs(const struct spdk_ftl_dev *dev, struct spdk_ftl_attrs *attrs)
+{
+ attrs->uuid = dev->uuid;
+ attrs->num_blocks = dev->num_lbas;
+ attrs->block_size = FTL_BLOCK_SIZE;
+ attrs->num_zones = ftl_get_num_zones(dev);
+ attrs->zone_size = ftl_get_num_blocks_in_zone(dev);
+ attrs->conf = dev->conf;
+ attrs->base_bdev = spdk_bdev_get_name(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
+
+ attrs->cache_bdev = NULL;
+ if (dev->nv_cache.bdev_desc) {
+ attrs->cache_bdev = spdk_bdev_get_name(
+ spdk_bdev_desc_get_bdev(dev->nv_cache.bdev_desc));
+ }
+}
+
+static void
+_ftl_io_write(void *ctx)
+{
+ ftl_io_write((struct ftl_io *)ctx);
+}
+
+static int
+ftl_submit_write_leaf(struct ftl_io *io)
+{
+ int rc;
+
+ rc = ftl_submit_write(ftl_wptr_from_band(io->band), io);
+ if (rc == -EAGAIN) {
+ /* EAGAIN means that the request was put on the pending queue */
+ return 0;
+ }
+
+ return rc;
+}
+
+void
+ftl_io_write(struct ftl_io *io)
+{
+ struct spdk_ftl_dev *dev = io->dev;
+ struct ftl_io_channel *ioch = ftl_io_channel_get_ctx(io->ioch);
+
+ /* Put the IO on retry queue in case IO channel is not initialized */
+ if (spdk_unlikely(ioch->index == FTL_IO_CHANNEL_INDEX_INVALID)) {
+ TAILQ_INSERT_TAIL(&ioch->retry_queue, io, ioch_entry);
+ return;
+ }
+
+ /* For normal IOs we just need to copy the data onto the write buffer */
+ if (!(io->flags & FTL_IO_MD)) {
+ ftl_io_call_foreach_child(io, ftl_wbuf_fill);
+ } else {
+ /* Metadata has its own buffer, so it doesn't have to be copied, so just */
+ /* send it the the core thread and schedule the write immediately */
+ if (ftl_check_core_thread(dev)) {
+ ftl_io_call_foreach_child(io, ftl_submit_write_leaf);
+ } else {
+ spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_io_write, io);
+ }
+ }
+}
+
+int
+spdk_ftl_write(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt,
+ struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg)
+{
+ struct ftl_io *io;
+
+ if (iov_cnt == 0) {
+ return -EINVAL;
+ }
+
+ if (lba_cnt == 0) {
+ return -EINVAL;
+ }
+
+ if (lba_cnt != ftl_iovec_num_blocks(iov, iov_cnt)) {
+ return -EINVAL;
+ }
+
+ if (!dev->initialized) {
+ return -EBUSY;
+ }
+
+ io = ftl_io_user_init(ch, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_WRITE);
+ if (!io) {
+ return -ENOMEM;
+ }
+
+ ftl_io_write(io);
+
+ return 0;
+}
+
+void
+ftl_io_read(struct ftl_io *io)
+{
+ ftl_io_call_foreach_child(io, ftl_submit_read);
+}
+
+int
+spdk_ftl_read(struct spdk_ftl_dev *dev, struct spdk_io_channel *ch, uint64_t lba, size_t lba_cnt,
+ struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_arg)
+{
+ struct ftl_io *io;
+
+ if (iov_cnt == 0) {
+ return -EINVAL;
+ }
+
+ if (lba_cnt == 0) {
+ return -EINVAL;
+ }
+
+ if (lba_cnt != ftl_iovec_num_blocks(iov, iov_cnt)) {
+ return -EINVAL;
+ }
+
+ if (!dev->initialized) {
+ return -EBUSY;
+ }
+
+ io = ftl_io_user_init(ch, lba, lba_cnt, iov, iov_cnt, cb_fn, cb_arg, FTL_IO_READ);
+ if (!io) {
+ return -ENOMEM;
+ }
+
+ ftl_io_read(io);
+ return 0;
+}
+
+static struct ftl_flush *
+ftl_flush_init(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg)
+{
+ struct ftl_flush *flush;
+
+ flush = calloc(1, sizeof(*flush));
+ if (!flush) {
+ return NULL;
+ }
+
+ flush->bmap = spdk_bit_array_create(FTL_BATCH_COUNT);
+ if (!flush->bmap) {
+ goto error;
+ }
+
+ flush->dev = dev;
+ flush->cb.fn = cb_fn;
+ flush->cb.ctx = cb_arg;
+
+ return flush;
+error:
+ free(flush);
+ return NULL;
+}
+
+static void
+_ftl_flush(void *ctx)
+{
+ struct ftl_flush *flush = ctx;
+ struct spdk_ftl_dev *dev = flush->dev;
+ uint32_t i;
+
+ /* Attach flush object to all non-empty batches */
+ for (i = 0; i < FTL_BATCH_COUNT; ++i) {
+ if (dev->batch_array[i].num_entries > 0) {
+ spdk_bit_array_set(flush->bmap, i);
+ flush->num_req++;
+ }
+ }
+
+ LIST_INSERT_HEAD(&dev->flush_list, flush, list_entry);
+
+ /* If the write buffer was already empty, the flush can be completed right away */
+ if (!flush->num_req) {
+ ftl_complete_flush(flush);
+ }
+}
+
+int
+ftl_flush_wbuf(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg)
+{
+ struct ftl_flush *flush;
+
+ flush = ftl_flush_init(dev, cb_fn, cb_arg);
+ if (!flush) {
+ return -ENOMEM;
+ }
+
+ spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_flush, flush);
+ return 0;
+}
+
+int
+spdk_ftl_flush(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg)
+{
+ if (!dev->initialized) {
+ return -EBUSY;
+ }
+
+ return ftl_flush_wbuf(dev, cb_fn, cb_arg);
+}
+
+bool
+ftl_addr_is_written(struct ftl_band *band, struct ftl_addr addr)
+{
+ struct ftl_zone *zone = ftl_band_zone_from_addr(band, addr);
+
+ return addr.offset < zone->info.write_pointer;
+}
+
+static void ftl_process_media_event(struct spdk_ftl_dev *dev, struct spdk_bdev_media_event event);
+
+static void
+_ftl_process_media_event(void *ctx)
+{
+ struct ftl_media_event *event = ctx;
+ struct spdk_ftl_dev *dev = event->dev;
+
+ ftl_process_media_event(dev, event->event);
+ spdk_mempool_put(dev->media_events_pool, event);
+}
+
+static void
+ftl_process_media_event(struct spdk_ftl_dev *dev, struct spdk_bdev_media_event event)
+{
+ struct ftl_band *band;
+ struct ftl_addr addr = { .offset = event.offset };
+ size_t block_off;
+
+ if (!ftl_check_core_thread(dev)) {
+ struct ftl_media_event *media_event;
+
+ media_event = spdk_mempool_get(dev->media_events_pool);
+ if (!media_event) {
+ SPDK_ERRLOG("Media event lost due to lack of memory");
+ return;
+ }
+
+ media_event->dev = dev;
+ media_event->event = event;
+ spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_process_media_event,
+ media_event);
+ return;
+ }
+
+ band = ftl_band_from_addr(dev, addr);
+ block_off = ftl_band_block_offset_from_addr(band, addr);
+
+ ftl_reloc_add(dev->reloc, band, block_off, event.num_blocks, 0, false);
+}
+
+void
+ftl_get_media_events(struct spdk_ftl_dev *dev)
+{
+#define FTL_MAX_MEDIA_EVENTS 128
+ struct spdk_bdev_media_event events[FTL_MAX_MEDIA_EVENTS];
+ size_t num_events, i;
+
+ if (!dev->initialized) {
+ return;
+ }
+
+ do {
+ num_events = spdk_bdev_get_media_events(dev->base_bdev_desc,
+ events, FTL_MAX_MEDIA_EVENTS);
+
+ for (i = 0; i < num_events; ++i) {
+ ftl_process_media_event(dev, events[i]);
+ }
+
+ } while (num_events);
+}
+
+int
+ftl_io_channel_poll(void *arg)
+{
+ struct ftl_io_channel *ch = arg;
+ struct ftl_io *io;
+ TAILQ_HEAD(, ftl_io) retry_queue;
+
+ if (TAILQ_EMPTY(&ch->write_cmpl_queue) && TAILQ_EMPTY(&ch->retry_queue)) {
+ return SPDK_POLLER_IDLE;
+ }
+
+ while (!TAILQ_EMPTY(&ch->write_cmpl_queue)) {
+ io = TAILQ_FIRST(&ch->write_cmpl_queue);
+ TAILQ_REMOVE(&ch->write_cmpl_queue, io, ioch_entry);
+ ftl_io_complete(io);
+ }
+
+ /*
+ * Create local copy of the retry queue to prevent from infinite retrying if IO will be
+ * inserted to the retry queue again
+ */
+ TAILQ_INIT(&retry_queue);
+ TAILQ_SWAP(&ch->retry_queue, &retry_queue, ftl_io, ioch_entry);
+
+ while (!TAILQ_EMPTY(&retry_queue)) {
+ io = TAILQ_FIRST(&retry_queue);
+ TAILQ_REMOVE(&retry_queue, io, ioch_entry);
+ if (io->type == FTL_IO_WRITE) {
+ ftl_io_write(io);
+ } else {
+ ftl_io_read(io);
+ }
+ }
+
+ return SPDK_POLLER_BUSY;
+}
+
+int
+ftl_task_core(void *ctx)
+{
+ struct spdk_ftl_dev *dev = ctx;
+
+ if (dev->halt) {
+ if (ftl_shutdown_complete(dev)) {
+ spdk_poller_unregister(&dev->core_poller);
+ return SPDK_POLLER_IDLE;
+ }
+ }
+
+ ftl_process_writes(dev);
+ ftl_process_relocs(dev);
+
+ return SPDK_POLLER_BUSY;
+}
+
+SPDK_LOG_REGISTER_COMPONENT("ftl_core", SPDK_LOG_FTL_CORE)
diff --git a/src/spdk/lib/ftl/ftl_core.h b/src/spdk/lib/ftl/ftl_core.h
new file mode 100644
index 000000000..b782ba731
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_core.h
@@ -0,0 +1,552 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FTL_CORE_H
+#define FTL_CORE_H
+
+#include "spdk/stdinc.h"
+#include "spdk/uuid.h"
+#include "spdk/thread.h"
+#include "spdk/util.h"
+#include "spdk_internal/log.h"
+#include "spdk/likely.h"
+#include "spdk/queue.h"
+#include "spdk/ftl.h"
+#include "spdk/bdev.h"
+#include "spdk/bdev_zone.h"
+
+#include "ftl_addr.h"
+#include "ftl_io.h"
+#include "ftl_trace.h"
+
+#ifdef SPDK_CONFIG_PMDK
+#include "libpmem.h"
+#endif /* SPDK_CONFIG_PMDK */
+
+struct spdk_ftl_dev;
+struct ftl_band;
+struct ftl_zone;
+struct ftl_io;
+struct ftl_restore;
+struct ftl_wptr;
+struct ftl_flush;
+struct ftl_reloc;
+struct ftl_anm_event;
+struct ftl_band_flush;
+
+struct ftl_stats {
+ /* Number of writes scheduled directly by the user */
+ uint64_t write_user;
+
+ /* Total number of writes */
+ uint64_t write_total;
+
+ /* Traces */
+ struct ftl_trace trace;
+
+ /* Number of limits applied */
+ uint64_t limits[SPDK_FTL_LIMIT_MAX];
+};
+
+struct ftl_global_md {
+ /* Device instance */
+ struct spdk_uuid uuid;
+ /* Size of the l2p table */
+ uint64_t num_lbas;
+};
+
+struct ftl_nv_cache {
+ /* Write buffer cache bdev */
+ struct spdk_bdev_desc *bdev_desc;
+ /* Write pointer */
+ uint64_t current_addr;
+ /* Number of available blocks left */
+ uint64_t num_available;
+ /* Maximum number of blocks */
+ uint64_t num_data_blocks;
+ /*
+ * Phase of the current cycle of writes. Each time whole cache area is filled, the phase is
+ * advanced. Current phase is saved in every IO's metadata, as well as in the header saved
+ * in the first sector. By looking at the phase of each block, it's possible to find the
+ * oldest block and replay the order of the writes when recovering the data from the cache.
+ */
+ unsigned int phase;
+ /* Indicates that the data can be written to the cache */
+ bool ready;
+ /* Metadata pool */
+ struct spdk_mempool *md_pool;
+ /* DMA buffer for writing the header */
+ void *dma_buf;
+ /* Cache lock */
+ pthread_spinlock_t lock;
+};
+
+struct ftl_batch {
+ /* Queue of write buffer entries, can reach up to xfer_size entries */
+ TAILQ_HEAD(, ftl_wbuf_entry) entries;
+ /* Number of entries in the queue above */
+ uint32_t num_entries;
+ /* Index within spdk_ftl_dev.batch_array */
+ uint32_t index;
+ struct iovec *iov;
+ void *metadata;
+ TAILQ_ENTRY(ftl_batch) tailq;
+};
+
+struct spdk_ftl_dev {
+ /* Device instance */
+ struct spdk_uuid uuid;
+ /* Device name */
+ char *name;
+ /* Configuration */
+ struct spdk_ftl_conf conf;
+
+ /* Indicates the device is fully initialized */
+ int initialized;
+ /* Indicates the device is about to be stopped */
+ int halt;
+ /* Indicates the device is about to start stopping - use to handle multiple stop request */
+ bool halt_started;
+
+ /* Underlying device */
+ struct spdk_bdev_desc *base_bdev_desc;
+
+ /* Non-volatile write buffer cache */
+ struct ftl_nv_cache nv_cache;
+
+ /* LBA map memory pool */
+ struct spdk_mempool *lba_pool;
+
+ /* LBA map requests pool */
+ struct spdk_mempool *lba_request_pool;
+
+ /* Media management events pool */
+ struct spdk_mempool *media_events_pool;
+
+ /* Statistics */
+ struct ftl_stats stats;
+
+ /* Current sequence number */
+ uint64_t seq;
+
+ /* Array of bands */
+ struct ftl_band *bands;
+ /* Number of operational bands */
+ size_t num_bands;
+ /* Next write band */
+ struct ftl_band *next_band;
+ /* Free band list */
+ LIST_HEAD(, ftl_band) free_bands;
+ /* Closed bands list */
+ LIST_HEAD(, ftl_band) shut_bands;
+ /* Number of free bands */
+ size_t num_free;
+
+ /* List of write pointers */
+ LIST_HEAD(, ftl_wptr) wptr_list;
+
+ /* Logical -> physical table */
+ void *l2p;
+ /* Size of the l2p table */
+ uint64_t num_lbas;
+ /* Size of pages mmapped for l2p, valid only for mapping on persistent memory */
+ size_t l2p_pmem_len;
+
+ /* Address size */
+ size_t addr_len;
+
+ /* Flush list */
+ LIST_HEAD(, ftl_flush) flush_list;
+ /* List of band flush requests */
+ LIST_HEAD(, ftl_band_flush) band_flush_list;
+
+ /* Device specific md buffer */
+ struct ftl_global_md global_md;
+
+ /* Metadata size */
+ size_t md_size;
+ void *md_buf;
+
+ /* Transfer unit size */
+ size_t xfer_size;
+
+ /* Current user write limit */
+ int limit;
+
+ /* Inflight IO operations */
+ uint32_t num_inflight;
+
+ /* Manages data relocation */
+ struct ftl_reloc *reloc;
+
+ /* Thread on which the poller is running */
+ struct spdk_thread *core_thread;
+ /* IO channel */
+ struct spdk_io_channel *ioch;
+ /* Poller */
+ struct spdk_poller *core_poller;
+
+ /* IO channel array provides means for retrieving write buffer entries
+ * from their address stored in L2P. The address is divided into two
+ * parts - IO channel offset poining at specific IO channel (within this
+ * array) and entry offset pointing at specific entry within that IO
+ * channel.
+ */
+ struct ftl_io_channel **ioch_array;
+ TAILQ_HEAD(, ftl_io_channel) ioch_queue;
+ uint64_t num_io_channels;
+ /* Value required to shift address of a write buffer entry to retrieve
+ * the IO channel it's part of. The other part of the address describes
+ * the offset of an entry within the IO channel's entry array.
+ */
+ uint64_t ioch_shift;
+
+ /* Write buffer batches */
+#define FTL_BATCH_COUNT 4096
+ struct ftl_batch batch_array[FTL_BATCH_COUNT];
+ /* Iovec buffer used by batches */
+ struct iovec *iov_buf;
+ /* Batch currently being filled */
+ struct ftl_batch *current_batch;
+ /* Full and ready to be sent batches. A batch is put on this queue in
+ * case it's already filled, but cannot be sent.
+ */
+ TAILQ_HEAD(, ftl_batch) pending_batches;
+ TAILQ_HEAD(, ftl_batch) free_batches;
+
+ /* Devices' list */
+ STAILQ_ENTRY(spdk_ftl_dev) stailq;
+};
+
+struct ftl_nv_cache_header {
+ /* Version of the header */
+ uint32_t version;
+ /* UUID of the FTL device */
+ struct spdk_uuid uuid;
+ /* Size of the non-volatile cache (in blocks) */
+ uint64_t size;
+ /* Contains the next address to be written after clean shutdown, invalid LBA otherwise */
+ uint64_t current_addr;
+ /* Current phase */
+ uint8_t phase;
+ /* Checksum of the header, needs to be last element */
+ uint32_t checksum;
+} __attribute__((packed));
+
+struct ftl_media_event {
+ /* Owner */
+ struct spdk_ftl_dev *dev;
+ /* Media event */
+ struct spdk_bdev_media_event event;
+};
+
+typedef void (*ftl_restore_fn)(struct ftl_restore *, int, void *cb_arg);
+
+void ftl_apply_limits(struct spdk_ftl_dev *dev);
+void ftl_io_read(struct ftl_io *io);
+void ftl_io_write(struct ftl_io *io);
+int ftl_flush_wbuf(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg);
+int ftl_current_limit(const struct spdk_ftl_dev *dev);
+int ftl_invalidate_addr(struct spdk_ftl_dev *dev, struct ftl_addr addr);
+int ftl_task_core(void *ctx);
+int ftl_task_read(void *ctx);
+void ftl_process_anm_event(struct ftl_anm_event *event);
+size_t ftl_tail_md_num_blocks(const struct spdk_ftl_dev *dev);
+size_t ftl_tail_md_hdr_num_blocks(void);
+size_t ftl_vld_map_num_blocks(const struct spdk_ftl_dev *dev);
+size_t ftl_lba_map_num_blocks(const struct spdk_ftl_dev *dev);
+size_t ftl_head_md_num_blocks(const struct spdk_ftl_dev *dev);
+int ftl_restore_md(struct spdk_ftl_dev *dev, ftl_restore_fn cb, void *cb_arg);
+int ftl_restore_device(struct ftl_restore *restore, ftl_restore_fn cb, void *cb_arg);
+void ftl_restore_nv_cache(struct ftl_restore *restore, ftl_restore_fn cb, void *cb_arg);
+int ftl_band_set_direct_access(struct ftl_band *band, bool access);
+bool ftl_addr_is_written(struct ftl_band *band, struct ftl_addr addr);
+int ftl_flush_active_bands(struct spdk_ftl_dev *dev, spdk_ftl_fn cb_fn, void *cb_arg);
+int ftl_nv_cache_write_header(struct ftl_nv_cache *nv_cache, bool shutdown,
+ spdk_bdev_io_completion_cb cb_fn, void *cb_arg);
+int ftl_nv_cache_scrub(struct ftl_nv_cache *nv_cache, spdk_bdev_io_completion_cb cb_fn,
+ void *cb_arg);
+void ftl_get_media_events(struct spdk_ftl_dev *dev);
+int ftl_io_channel_poll(void *arg);
+void ftl_evict_cache_entry(struct spdk_ftl_dev *dev, struct ftl_wbuf_entry *entry);
+struct spdk_io_channel *ftl_get_io_channel(const struct spdk_ftl_dev *dev);
+struct ftl_io_channel *ftl_io_channel_get_ctx(struct spdk_io_channel *ioch);
+
+
+#define ftl_to_addr(address) \
+ (struct ftl_addr) { .offset = (uint64_t)(address) }
+
+#define ftl_to_addr_packed(address) \
+ (struct ftl_addr) { .pack.offset = (uint32_t)(address) }
+
+static inline struct spdk_thread *
+ftl_get_core_thread(const struct spdk_ftl_dev *dev)
+{
+ return dev->core_thread;
+}
+
+static inline size_t
+ftl_get_num_bands(const struct spdk_ftl_dev *dev)
+{
+ return dev->num_bands;
+}
+
+static inline size_t
+ftl_get_num_punits(const struct spdk_ftl_dev *dev)
+{
+ return spdk_bdev_get_optimal_open_zones(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
+}
+
+static inline size_t
+ftl_get_num_zones(const struct spdk_ftl_dev *dev)
+{
+ return ftl_get_num_bands(dev) * ftl_get_num_punits(dev);
+}
+
+static inline size_t
+ftl_get_num_blocks_in_zone(const struct spdk_ftl_dev *dev)
+{
+ return spdk_bdev_get_zone_size(spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
+}
+
+static inline uint64_t
+ftl_get_num_blocks_in_band(const struct spdk_ftl_dev *dev)
+{
+ return ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev);
+}
+
+static inline uint64_t
+ftl_addr_get_zone_slba(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ return addr.offset -= (addr.offset % ftl_get_num_blocks_in_zone(dev));
+}
+
+static inline uint64_t
+ftl_addr_get_band(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ return addr.offset / ftl_get_num_blocks_in_band(dev);
+}
+
+static inline uint64_t
+ftl_addr_get_punit(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ return (addr.offset / ftl_get_num_blocks_in_zone(dev)) % ftl_get_num_punits(dev);
+}
+
+static inline uint64_t
+ftl_addr_get_zone_offset(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ return addr.offset % ftl_get_num_blocks_in_zone(dev);
+}
+
+static inline size_t
+ftl_vld_map_size(const struct spdk_ftl_dev *dev)
+{
+ return (size_t)spdk_divide_round_up(ftl_get_num_blocks_in_band(dev), CHAR_BIT);
+}
+
+static inline int
+ftl_addr_packed(const struct spdk_ftl_dev *dev)
+{
+ return dev->addr_len < 32;
+}
+
+static inline void
+ftl_l2p_lba_persist(const struct spdk_ftl_dev *dev, uint64_t lba)
+{
+#ifdef SPDK_CONFIG_PMDK
+ size_t ftl_addr_size = ftl_addr_packed(dev) ? 4 : 8;
+ pmem_persist((char *)dev->l2p + (lba * ftl_addr_size), ftl_addr_size);
+#else /* SPDK_CONFIG_PMDK */
+ SPDK_ERRLOG("Libpmem not available, cannot flush l2p to pmem\n");
+ assert(0);
+#endif /* SPDK_CONFIG_PMDK */
+}
+
+static inline int
+ftl_addr_invalid(struct ftl_addr addr)
+{
+ return addr.offset == ftl_to_addr(FTL_ADDR_INVALID).offset;
+}
+
+static inline int
+ftl_addr_cached(struct ftl_addr addr)
+{
+ return !ftl_addr_invalid(addr) && addr.cached;
+}
+
+static inline struct ftl_addr
+ftl_addr_to_packed(const struct spdk_ftl_dev *dev, struct ftl_addr addr)
+{
+ struct ftl_addr p = {};
+
+ if (ftl_addr_invalid(addr)) {
+ p = ftl_to_addr_packed(FTL_ADDR_INVALID);
+ } else if (ftl_addr_cached(addr)) {
+ p.pack.cached = 1;
+ p.pack.cache_offset = (uint32_t) addr.cache_offset;
+ } else {
+ p.pack.offset = (uint32_t) addr.offset;
+ }
+
+ return p;
+}
+
+static inline struct ftl_addr
+ftl_addr_from_packed(const struct spdk_ftl_dev *dev, struct ftl_addr p)
+{
+ struct ftl_addr addr = {};
+
+ if (p.pack.offset == (uint32_t)FTL_ADDR_INVALID) {
+ addr = ftl_to_addr(FTL_ADDR_INVALID);
+ } else if (p.pack.cached) {
+ addr.cached = 1;
+ addr.cache_offset = p.pack.cache_offset;
+ } else {
+ addr = p;
+ }
+
+ return addr;
+}
+
+#define _ftl_l2p_set(l2p, off, val, bits) \
+ __atomic_store_n(((uint##bits##_t *)(l2p)) + (off), val, __ATOMIC_SEQ_CST)
+
+#define _ftl_l2p_set32(l2p, off, val) \
+ _ftl_l2p_set(l2p, off, val, 32)
+
+#define _ftl_l2p_set64(l2p, off, val) \
+ _ftl_l2p_set(l2p, off, val, 64)
+
+#define _ftl_l2p_get(l2p, off, bits) \
+ __atomic_load_n(((uint##bits##_t *)(l2p)) + (off), __ATOMIC_SEQ_CST)
+
+#define _ftl_l2p_get32(l2p, off) \
+ _ftl_l2p_get(l2p, off, 32)
+
+#define _ftl_l2p_get64(l2p, off) \
+ _ftl_l2p_get(l2p, off, 64)
+
+#define ftl_addr_cmp(p1, p2) \
+ ((p1).offset == (p2).offset)
+
+static inline void
+ftl_l2p_set(struct spdk_ftl_dev *dev, uint64_t lba, struct ftl_addr addr)
+{
+ assert(dev->num_lbas > lba);
+
+ if (ftl_addr_packed(dev)) {
+ _ftl_l2p_set32(dev->l2p, lba, ftl_addr_to_packed(dev, addr).offset);
+ } else {
+ _ftl_l2p_set64(dev->l2p, lba, addr.offset);
+ }
+
+ if (dev->l2p_pmem_len != 0) {
+ ftl_l2p_lba_persist(dev, lba);
+ }
+}
+
+static inline struct ftl_addr
+ftl_l2p_get(struct spdk_ftl_dev *dev, uint64_t lba)
+{
+ assert(dev->num_lbas > lba);
+
+ if (ftl_addr_packed(dev)) {
+ return ftl_addr_from_packed(dev, ftl_to_addr_packed(
+ _ftl_l2p_get32(dev->l2p, lba)));
+ } else {
+ return ftl_to_addr(_ftl_l2p_get64(dev->l2p, lba));
+ }
+}
+
+static inline bool
+ftl_dev_has_nv_cache(const struct spdk_ftl_dev *dev)
+{
+ return dev->nv_cache.bdev_desc != NULL;
+}
+
+#define FTL_NV_CACHE_HEADER_VERSION (1)
+#define FTL_NV_CACHE_DATA_OFFSET (1)
+#define FTL_NV_CACHE_PHASE_OFFSET (62)
+#define FTL_NV_CACHE_PHASE_COUNT (4)
+#define FTL_NV_CACHE_PHASE_MASK (3ULL << FTL_NV_CACHE_PHASE_OFFSET)
+#define FTL_NV_CACHE_LBA_INVALID (FTL_LBA_INVALID & ~FTL_NV_CACHE_PHASE_MASK)
+
+static inline bool
+ftl_nv_cache_phase_is_valid(unsigned int phase)
+{
+ return phase > 0 && phase <= 3;
+}
+
+static inline unsigned int
+ftl_nv_cache_next_phase(unsigned int current)
+{
+ static const unsigned int phases[] = { 0, 2, 3, 1 };
+ assert(ftl_nv_cache_phase_is_valid(current));
+ return phases[current];
+}
+
+static inline unsigned int
+ftl_nv_cache_prev_phase(unsigned int current)
+{
+ static const unsigned int phases[] = { 0, 3, 1, 2 };
+ assert(ftl_nv_cache_phase_is_valid(current));
+ return phases[current];
+}
+
+static inline uint64_t
+ftl_nv_cache_pack_lba(uint64_t lba, unsigned int phase)
+{
+ assert(ftl_nv_cache_phase_is_valid(phase));
+ return (lba & ~FTL_NV_CACHE_PHASE_MASK) | ((uint64_t)phase << FTL_NV_CACHE_PHASE_OFFSET);
+}
+
+static inline void
+ftl_nv_cache_unpack_lba(uint64_t in_lba, uint64_t *out_lba, unsigned int *phase)
+{
+ *out_lba = in_lba & ~FTL_NV_CACHE_PHASE_MASK;
+ *phase = (in_lba & FTL_NV_CACHE_PHASE_MASK) >> FTL_NV_CACHE_PHASE_OFFSET;
+
+ /* If the phase is invalid the block wasn't written yet, so treat the LBA as invalid too */
+ if (!ftl_nv_cache_phase_is_valid(*phase) || *out_lba == FTL_NV_CACHE_LBA_INVALID) {
+ *out_lba = FTL_LBA_INVALID;
+ }
+}
+
+static inline bool
+ftl_is_append_supported(const struct spdk_ftl_dev *dev)
+{
+ return dev->conf.use_append;
+}
+
+#endif /* FTL_CORE_H */
diff --git a/src/spdk/lib/ftl/ftl_debug.c b/src/spdk/lib/ftl/ftl_debug.c
new file mode 100644
index 000000000..9fbb43810
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_debug.c
@@ -0,0 +1,169 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk_internal/log.h"
+#include "spdk/ftl.h"
+#include "ftl_debug.h"
+#include "ftl_band.h"
+
+#if defined(DEBUG)
+#if defined(FTL_META_DEBUG)
+
+static const char *ftl_band_state_str[] = {
+ "free",
+ "prep",
+ "opening",
+ "open",
+ "full",
+ "closing",
+ "closed",
+ "max"
+};
+
+bool
+ftl_band_validate_md(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_lba_map *lba_map = &band->lba_map;
+ struct ftl_addr addr_md, addr_l2p;
+ size_t i, size, seg_off;
+ bool valid = true;
+
+ size = ftl_get_num_blocks_in_band(dev);
+
+ pthread_spin_lock(&lba_map->lock);
+ for (i = 0; i < size; ++i) {
+ if (!spdk_bit_array_get(lba_map->vld, i)) {
+ continue;
+ }
+
+ seg_off = i / FTL_NUM_LBA_IN_BLOCK;
+ if (lba_map->segments[seg_off] != FTL_LBA_MAP_SEG_CACHED) {
+ continue;
+ }
+
+ addr_md = ftl_band_addr_from_block_offset(band, i);
+ addr_l2p = ftl_l2p_get(dev, lba_map->map[i]);
+
+ if (addr_l2p.cached) {
+ continue;
+ }
+
+ if (addr_l2p.offset != addr_md.offset) {
+ valid = false;
+ break;
+ }
+
+ }
+
+ pthread_spin_unlock(&lba_map->lock);
+
+ return valid;
+}
+
+void
+ftl_dev_dump_bands(struct spdk_ftl_dev *dev)
+{
+ size_t i, total = 0;
+
+ if (!dev->bands) {
+ return;
+ }
+
+ ftl_debug("Bands validity:\n");
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ if (dev->bands[i].state == FTL_BAND_STATE_FREE &&
+ dev->bands[i].wr_cnt == 0) {
+ continue;
+ }
+
+ if (!dev->bands[i].num_zones) {
+ ftl_debug(" Band %3zu: all zones are offline\n", i + 1);
+ continue;
+ }
+
+ total += dev->bands[i].lba_map.num_vld;
+ ftl_debug(" Band %3zu: %8zu / %zu \tnum_zones: %zu \twr_cnt: %"PRIu64"\tmerit:"
+ "%10.3f\tstate: %s\n",
+ i + 1, dev->bands[i].lba_map.num_vld,
+ ftl_band_user_blocks(&dev->bands[i]),
+ dev->bands[i].num_zones,
+ dev->bands[i].wr_cnt,
+ dev->bands[i].merit,
+ ftl_band_state_str[dev->bands[i].state]);
+ }
+}
+
+#endif /* defined(FTL_META_DEBUG) */
+
+#if defined(FTL_DUMP_STATS)
+
+void
+ftl_dev_dump_stats(const struct spdk_ftl_dev *dev)
+{
+ size_t i, total = 0;
+ char uuid[SPDK_UUID_STRING_LEN];
+ double waf;
+ const char *limits[] = {
+ [SPDK_FTL_LIMIT_CRIT] = "crit",
+ [SPDK_FTL_LIMIT_HIGH] = "high",
+ [SPDK_FTL_LIMIT_LOW] = "low",
+ [SPDK_FTL_LIMIT_START] = "start"
+ };
+
+ if (!dev->bands) {
+ return;
+ }
+
+ /* Count the number of valid LBAs */
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ total += dev->bands[i].lba_map.num_vld;
+ }
+
+ waf = (double)dev->stats.write_total / (double)dev->stats.write_user;
+
+ spdk_uuid_fmt_lower(uuid, sizeof(uuid), &dev->uuid);
+ ftl_debug("\n");
+ ftl_debug("device UUID: %s\n", uuid);
+ ftl_debug("total valid LBAs: %zu\n", total);
+ ftl_debug("total writes: %"PRIu64"\n", dev->stats.write_total);
+ ftl_debug("user writes: %"PRIu64"\n", dev->stats.write_user);
+ ftl_debug("WAF: %.4lf\n", waf);
+ ftl_debug("limits:\n");
+ for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
+ ftl_debug(" %5s: %"PRIu64"\n", limits[i], dev->stats.limits[i]);
+ }
+}
+
+#endif /* defined(FTL_DUMP_STATS) */
+#endif /* defined(DEBUG) */
diff --git a/src/spdk/lib/ftl/ftl_debug.h b/src/spdk/lib/ftl/ftl_debug.h
new file mode 100644
index 000000000..c90c92ef2
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_debug.h
@@ -0,0 +1,73 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FTL_DEBUG_H
+#define FTL_DEBUG_H
+
+#include "ftl_addr.h"
+#include "ftl_band.h"
+#include "ftl_core.h"
+
+#if defined(DEBUG)
+/* Debug flags - enabled when defined */
+#define FTL_META_DEBUG 1
+#define FTL_DUMP_STATS 1
+
+#define ftl_debug(msg, ...) \
+ SPDK_ERRLOG(msg, ## __VA_ARGS__)
+#else
+#define ftl_debug(msg, ...)
+#endif
+
+static inline const char *
+ftl_addr2str(struct ftl_addr addr, char *buf, size_t size)
+{
+ snprintf(buf, size, "(%"PRIu64")", addr.offset);
+ return buf;
+}
+
+#if defined(FTL_META_DEBUG)
+bool ftl_band_validate_md(struct ftl_band *band);
+void ftl_dev_dump_bands(struct spdk_ftl_dev *dev);
+#else
+#define ftl_band_validate_md(band)
+#define ftl_dev_dump_bands(dev)
+#endif
+
+#if defined(FTL_DUMP_STATS)
+void ftl_dev_dump_stats(const struct spdk_ftl_dev *dev);
+#else
+#define ftl_dev_dump_stats(dev)
+#endif
+
+#endif /* FTL_DEBUG_H */
diff --git a/src/spdk/lib/ftl/ftl_init.c b/src/spdk/lib/ftl/ftl_init.c
new file mode 100644
index 000000000..15a8c21c9
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_init.c
@@ -0,0 +1,1688 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/nvme.h"
+#include "spdk/thread.h"
+#include "spdk/string.h"
+#include "spdk/likely.h"
+#include "spdk_internal/log.h"
+#include "spdk/ftl.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/bdev_zone.h"
+#include "spdk/bdev_module.h"
+#include "spdk/config.h"
+
+#include "ftl_core.h"
+#include "ftl_io.h"
+#include "ftl_reloc.h"
+#include "ftl_band.h"
+#include "ftl_debug.h"
+
+#ifdef SPDK_CONFIG_PMDK
+#include "libpmem.h"
+#endif /* SPDK_CONFIG_PMDK */
+
+#define FTL_CORE_RING_SIZE 4096
+#define FTL_INIT_TIMEOUT 30
+#define FTL_NSID 1
+#define FTL_ZONE_INFO_COUNT 64
+
+/* Dummy bdev module used to to claim bdevs. */
+static struct spdk_bdev_module g_ftl_bdev_module = {
+ .name = "ftl_lib",
+};
+
+struct ftl_dev_init_ctx {
+ /* Owner */
+ struct spdk_ftl_dev *dev;
+ /* Initial arguments */
+ struct spdk_ftl_dev_init_opts opts;
+ /* IO channel for zone info retrieving */
+ struct spdk_io_channel *ioch;
+ /* Buffer for reading zone info */
+ struct spdk_bdev_zone_info info[FTL_ZONE_INFO_COUNT];
+ /* Currently read zone */
+ size_t zone_id;
+ /* User's callback */
+ spdk_ftl_init_fn cb_fn;
+ /* Callback's argument */
+ void *cb_arg;
+ /* Thread to call the callback on */
+ struct spdk_thread *thread;
+ /* Poller to check if the device has been destroyed/initialized */
+ struct spdk_poller *poller;
+ /* Status to return for halt completion callback */
+ int halt_complete_status;
+};
+
+static STAILQ_HEAD(, spdk_ftl_dev) g_ftl_queue = STAILQ_HEAD_INITIALIZER(g_ftl_queue);
+static pthread_mutex_t g_ftl_queue_lock = PTHREAD_MUTEX_INITIALIZER;
+static const struct spdk_ftl_conf g_default_conf = {
+ .limits = {
+ /* 5 free bands / 0 % host writes */
+ [SPDK_FTL_LIMIT_CRIT] = { .thld = 5, .limit = 0 },
+ /* 10 free bands / 5 % host writes */
+ [SPDK_FTL_LIMIT_HIGH] = { .thld = 10, .limit = 5 },
+ /* 20 free bands / 40 % host writes */
+ [SPDK_FTL_LIMIT_LOW] = { .thld = 20, .limit = 40 },
+ /* 40 free bands / 100 % host writes - defrag starts running */
+ [SPDK_FTL_LIMIT_START] = { .thld = 40, .limit = 100 },
+ },
+ /* 10 percent valid blocks */
+ .invalid_thld = 10,
+ /* 20% spare blocks */
+ .lba_rsvd = 20,
+ /* 6M write buffer per each IO channel */
+ .write_buffer_size = 6 * 1024 * 1024,
+ /* 90% band fill threshold */
+ .band_thld = 90,
+ /* Max 32 IO depth per band relocate */
+ .max_reloc_qdepth = 32,
+ /* Max 3 active band relocates */
+ .max_active_relocs = 3,
+ /* IO pool size per user thread (this should be adjusted to thread IO qdepth) */
+ .user_io_pool_size = 2048,
+ /*
+ * If clear ftl will return error when restoring after a dirty shutdown
+ * If set, last band will be padded, ftl will restore based only on closed bands - this
+ * will result in lost data after recovery.
+ */
+ .allow_open_bands = false,
+ .max_io_channels = 128,
+ .nv_cache = {
+ /* Maximum number of concurrent requests */
+ .max_request_cnt = 2048,
+ /* Maximum number of blocks per request */
+ .max_request_size = 16,
+ }
+};
+
+static int
+ftl_band_init_md(struct ftl_band *band)
+{
+ struct ftl_lba_map *lba_map = &band->lba_map;
+ int rc;
+
+ lba_map->vld = spdk_bit_array_create(ftl_get_num_blocks_in_band(band->dev));
+ if (!lba_map->vld) {
+ return -ENOMEM;
+ }
+
+ rc = pthread_spin_init(&lba_map->lock, PTHREAD_PROCESS_PRIVATE);
+ if (rc) {
+ spdk_bit_array_free(&lba_map->vld);
+ return rc;
+ }
+ ftl_band_md_clear(band);
+ return 0;
+}
+
+static int
+ftl_check_conf(const struct spdk_ftl_dev *dev, const struct spdk_ftl_conf *conf)
+{
+ size_t i;
+
+ if (conf->invalid_thld >= 100) {
+ return -1;
+ }
+ if (conf->lba_rsvd >= 100) {
+ return -1;
+ }
+ if (conf->lba_rsvd == 0) {
+ return -1;
+ }
+ if (conf->write_buffer_size == 0) {
+ return -1;
+ }
+ if (conf->write_buffer_size % FTL_BLOCK_SIZE != 0) {
+ return -1;
+ }
+
+ for (i = 0; i < SPDK_FTL_LIMIT_MAX; ++i) {
+ if (conf->limits[i].limit > 100) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+ftl_dev_init_bands(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band, *pband;
+ unsigned int i;
+ int rc = 0;
+
+ LIST_INIT(&dev->free_bands);
+ LIST_INIT(&dev->shut_bands);
+
+ dev->num_free = 0;
+ dev->bands = calloc(ftl_get_num_bands(dev), sizeof(*dev->bands));
+ if (!dev->bands) {
+ return -1;
+ }
+
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ band = &dev->bands[i];
+ band->id = i;
+ band->dev = dev;
+ band->state = FTL_BAND_STATE_CLOSED;
+
+ if (LIST_EMPTY(&dev->shut_bands)) {
+ LIST_INSERT_HEAD(&dev->shut_bands, band, list_entry);
+ } else {
+ LIST_INSERT_AFTER(pband, band, list_entry);
+ }
+ pband = band;
+
+ CIRCLEQ_INIT(&band->zones);
+ band->zone_buf = calloc(ftl_get_num_punits(dev), sizeof(*band->zone_buf));
+ if (!band->zone_buf) {
+ SPDK_ERRLOG("Failed to allocate block state table for band: [%u]\n", i);
+ rc = -1;
+ break;
+ }
+
+ rc = ftl_band_init_md(band);
+ if (rc) {
+ SPDK_ERRLOG("Failed to initialize metadata structures for band [%u]\n", i);
+ break;
+ }
+
+ band->reloc_bitmap = spdk_bit_array_create(ftl_get_num_bands(dev));
+ if (!band->reloc_bitmap) {
+ SPDK_ERRLOG("Failed to allocate band relocation bitmap\n");
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static void
+ftl_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
+{
+ struct spdk_ftl_dev *dev = event_ctx;
+
+ switch (type) {
+ case SPDK_BDEV_EVENT_REMOVE:
+ assert(0);
+ break;
+ case SPDK_BDEV_EVENT_MEDIA_MANAGEMENT:
+ assert(bdev == spdk_bdev_desc_get_bdev(dev->base_bdev_desc));
+ ftl_get_media_events(dev);
+ default:
+ break;
+ }
+}
+
+static int
+ftl_dev_init_nv_cache(struct spdk_ftl_dev *dev, const char *bdev_name)
+{
+ struct spdk_bdev *bdev;
+ struct spdk_ftl_conf *conf = &dev->conf;
+ struct ftl_nv_cache *nv_cache = &dev->nv_cache;
+ char pool_name[128];
+ int rc;
+
+ if (!bdev_name) {
+ return 0;
+ }
+
+ bdev = spdk_bdev_get_by_name(bdev_name);
+ if (!bdev) {
+ SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
+ return -1;
+ }
+
+ if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
+ dev, &nv_cache->bdev_desc)) {
+ SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
+ return -1;
+ }
+
+ if (spdk_bdev_module_claim_bdev(bdev, nv_cache->bdev_desc, &g_ftl_bdev_module)) {
+ spdk_bdev_close(nv_cache->bdev_desc);
+ nv_cache->bdev_desc = NULL;
+ SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
+ return -1;
+ }
+
+ SPDK_INFOLOG(SPDK_LOG_FTL_INIT, "Using %s as write buffer cache\n",
+ spdk_bdev_get_name(bdev));
+
+ if (spdk_bdev_get_block_size(bdev) != FTL_BLOCK_SIZE) {
+ SPDK_ERRLOG("Unsupported block size (%d)\n", spdk_bdev_get_block_size(bdev));
+ return -1;
+ }
+
+ if (!spdk_bdev_is_md_separate(bdev)) {
+ SPDK_ERRLOG("Bdev %s doesn't support separate metadata buffer IO\n",
+ spdk_bdev_get_name(bdev));
+ return -1;
+ }
+
+ if (spdk_bdev_get_md_size(bdev) < sizeof(uint64_t)) {
+ SPDK_ERRLOG("Bdev's %s metadata is too small (%"PRIu32")\n",
+ spdk_bdev_get_name(bdev), spdk_bdev_get_md_size(bdev));
+ return -1;
+ }
+
+ if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
+ SPDK_ERRLOG("Unsupported DIF type used by bdev %s\n",
+ spdk_bdev_get_name(bdev));
+ return -1;
+ }
+
+ /* The cache needs to be capable of storing at least two full bands. This requirement comes
+ * from the fact that cache works as a protection against power loss, so before the data
+ * inside the cache can be overwritten, the band it's stored on has to be closed. Plus one
+ * extra block is needed to store the header.
+ */
+ if (spdk_bdev_get_num_blocks(bdev) < ftl_get_num_blocks_in_band(dev) * 2 + 1) {
+ SPDK_ERRLOG("Insufficient number of blocks for write buffer cache (available: %"
+ PRIu64", required: %"PRIu64")\n", spdk_bdev_get_num_blocks(bdev),
+ ftl_get_num_blocks_in_band(dev) * 2 + 1);
+ return -1;
+ }
+
+ rc = snprintf(pool_name, sizeof(pool_name), "ftl-nvpool-%p", dev);
+ if (rc < 0 || rc >= 128) {
+ return -1;
+ }
+
+ nv_cache->md_pool = spdk_mempool_create(pool_name, conf->nv_cache.max_request_cnt,
+ spdk_bdev_get_md_size(bdev) *
+ conf->nv_cache.max_request_size,
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (!nv_cache->md_pool) {
+ SPDK_ERRLOG("Failed to initialize non-volatile cache metadata pool\n");
+ return -1;
+ }
+
+ nv_cache->dma_buf = spdk_dma_zmalloc(FTL_BLOCK_SIZE, spdk_bdev_get_buf_align(bdev), NULL);
+ if (!nv_cache->dma_buf) {
+ SPDK_ERRLOG("Memory allocation failure\n");
+ return -1;
+ }
+
+ if (pthread_spin_init(&nv_cache->lock, PTHREAD_PROCESS_PRIVATE)) {
+ SPDK_ERRLOG("Failed to initialize cache lock\n");
+ return -1;
+ }
+
+ nv_cache->current_addr = FTL_NV_CACHE_DATA_OFFSET;
+ nv_cache->num_data_blocks = spdk_bdev_get_num_blocks(bdev) - 1;
+ nv_cache->num_available = nv_cache->num_data_blocks;
+ nv_cache->ready = false;
+
+ return 0;
+}
+
+void
+spdk_ftl_conf_init_defaults(struct spdk_ftl_conf *conf)
+{
+ *conf = g_default_conf;
+}
+
+static void
+ftl_lba_map_request_ctor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
+{
+ struct ftl_lba_map_request *request = obj;
+ struct spdk_ftl_dev *dev = opaque;
+
+ request->segments = spdk_bit_array_create(spdk_divide_round_up(
+ ftl_get_num_blocks_in_band(dev), FTL_NUM_LBA_IN_BLOCK));
+}
+
+static int
+ftl_init_media_events_pool(struct spdk_ftl_dev *dev)
+{
+ char pool_name[128];
+ int rc;
+
+ rc = snprintf(pool_name, sizeof(pool_name), "ftl-media-%p", dev);
+ if (rc < 0 || rc >= (int)sizeof(pool_name)) {
+ SPDK_ERRLOG("Failed to create media pool name\n");
+ return -1;
+ }
+
+ dev->media_events_pool = spdk_mempool_create(pool_name, 1024,
+ sizeof(struct ftl_media_event),
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (!dev->media_events_pool) {
+ SPDK_ERRLOG("Failed to create media events pool\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+ftl_init_lba_map_pools(struct spdk_ftl_dev *dev)
+{
+#define POOL_NAME_LEN 128
+ char pool_name[POOL_NAME_LEN];
+ int rc;
+
+ rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lba-pool");
+ if (rc < 0 || rc >= POOL_NAME_LEN) {
+ return -ENAMETOOLONG;
+ }
+
+ /* We need to reserve at least 2 buffers for band close / open sequence
+ * alone, plus additional (8) buffers for handling write errors.
+ * TODO: This memory pool is utilized only by core thread - it introduce
+ * unnecessary overhead and should be replaced by different data structure.
+ */
+ dev->lba_pool = spdk_mempool_create(pool_name, 2 + 8,
+ ftl_lba_map_pool_elem_size(dev),
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (!dev->lba_pool) {
+ return -ENOMEM;
+ }
+
+ rc = snprintf(pool_name, sizeof(pool_name), "%s-%s", dev->name, "ftl-lbareq-pool");
+ if (rc < 0 || rc >= POOL_NAME_LEN) {
+ return -ENAMETOOLONG;
+ }
+
+ dev->lba_request_pool = spdk_mempool_create_ctor(pool_name,
+ dev->conf.max_reloc_qdepth * dev->conf.max_active_relocs,
+ sizeof(struct ftl_lba_map_request),
+ SPDK_MEMPOOL_DEFAULT_CACHE_SIZE,
+ SPDK_ENV_SOCKET_ID_ANY,
+ ftl_lba_map_request_ctor,
+ dev);
+ if (!dev->lba_request_pool) {
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+ftl_init_wptr_list(struct spdk_ftl_dev *dev)
+{
+ LIST_INIT(&dev->wptr_list);
+ LIST_INIT(&dev->flush_list);
+ LIST_INIT(&dev->band_flush_list);
+}
+
+static size_t
+ftl_dev_band_max_seq(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band;
+ size_t seq = 0;
+
+ LIST_FOREACH(band, &dev->shut_bands, list_entry) {
+ if (band->seq > seq) {
+ seq = band->seq;
+ }
+ }
+
+ return seq;
+}
+
+static void
+_ftl_init_bands_state(void *ctx)
+{
+ struct ftl_band *band, *temp_band;
+ struct spdk_ftl_dev *dev = ctx;
+
+ dev->seq = ftl_dev_band_max_seq(dev);
+
+ LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
+ if (!band->lba_map.num_vld) {
+ ftl_band_set_state(band, FTL_BAND_STATE_FREE);
+ }
+ }
+
+ ftl_reloc_resume(dev->reloc);
+ /* Clear the limit applications as they're incremented incorrectly by */
+ /* the initialization code */
+ memset(dev->stats.limits, 0, sizeof(dev->stats.limits));
+}
+
+static int
+ftl_init_num_free_bands(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band;
+ int cnt = 0;
+
+ LIST_FOREACH(band, &dev->shut_bands, list_entry) {
+ if (band->num_zones && !band->lba_map.num_vld) {
+ cnt++;
+ }
+ }
+ return cnt;
+}
+
+static int
+ftl_init_bands_state(struct spdk_ftl_dev *dev)
+{
+ /* TODO: Should we abort initialization or expose read only device */
+ /* if there is no free bands? */
+ /* If we abort initialization should we depend on condition that */
+ /* we have no free bands or should we have some minimal number of */
+ /* free bands? */
+ if (!ftl_init_num_free_bands(dev)) {
+ return -1;
+ }
+
+ spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_init_bands_state, dev);
+ return 0;
+}
+
+static void
+_ftl_dev_init_core_thread(void *ctx)
+{
+ struct spdk_ftl_dev *dev = ctx;
+
+ dev->core_poller = SPDK_POLLER_REGISTER(ftl_task_core, dev, 0);
+ if (!dev->core_poller) {
+ SPDK_ERRLOG("Unable to register core poller\n");
+ assert(0);
+ }
+
+ dev->ioch = spdk_get_io_channel(dev);
+}
+
+static int
+ftl_dev_init_core_thread(struct spdk_ftl_dev *dev, const struct spdk_ftl_dev_init_opts *opts)
+{
+ if (!opts->core_thread) {
+ return -1;
+ }
+
+ dev->core_thread = opts->core_thread;
+
+ spdk_thread_send_msg(opts->core_thread, _ftl_dev_init_core_thread, dev);
+ return 0;
+}
+
+static int
+ftl_dev_l2p_alloc_pmem(struct spdk_ftl_dev *dev, size_t l2p_size, const char *l2p_path)
+{
+#ifdef SPDK_CONFIG_PMDK
+ int is_pmem;
+
+ if ((dev->l2p = pmem_map_file(l2p_path, 0,
+ 0, 0, &dev->l2p_pmem_len, &is_pmem)) == NULL) {
+ SPDK_ERRLOG("Failed to mmap l2p_path\n");
+ return -1;
+ }
+
+ if (!is_pmem) {
+ SPDK_NOTICELOG("l2p_path mapped on non-pmem device\n");
+ }
+
+ if (dev->l2p_pmem_len < l2p_size) {
+ SPDK_ERRLOG("l2p_path file is too small\n");
+ return -1;
+ }
+
+ pmem_memset_persist(dev->l2p, FTL_ADDR_INVALID, l2p_size);
+
+ return 0;
+#else /* SPDK_CONFIG_PMDK */
+ SPDK_ERRLOG("Libpmem not available, cannot use pmem l2p_path\n");
+ return -1;
+#endif /* SPDK_CONFIG_PMDK */
+}
+
+static int
+ftl_dev_l2p_alloc_dram(struct spdk_ftl_dev *dev, size_t l2p_size)
+{
+ dev->l2p = malloc(l2p_size);
+ if (!dev->l2p) {
+ SPDK_ERRLOG("Failed to allocate l2p table\n");
+ return -1;
+ }
+
+ memset(dev->l2p, FTL_ADDR_INVALID, l2p_size);
+
+ return 0;
+}
+
+static int
+ftl_dev_l2p_alloc(struct spdk_ftl_dev *dev)
+{
+ size_t addr_size = dev->addr_len >= 32 ? 8 : 4;
+ size_t l2p_size = dev->num_lbas * addr_size;
+ const char *l2p_path = dev->conf.l2p_path;
+
+ if (dev->num_lbas == 0) {
+ SPDK_ERRLOG("Invalid l2p table size\n");
+ return -1;
+ }
+
+ if (dev->l2p) {
+ SPDK_ERRLOG("L2p table already allocated\n");
+ return -1;
+ }
+
+ dev->l2p_pmem_len = 0;
+ if (l2p_path) {
+ return ftl_dev_l2p_alloc_pmem(dev, l2p_size, l2p_path);
+ } else {
+ return ftl_dev_l2p_alloc_dram(dev, l2p_size);
+ }
+}
+
+static void
+ftl_dev_free_init_ctx(struct ftl_dev_init_ctx *init_ctx)
+{
+ if (!init_ctx) {
+ return;
+ }
+
+ if (init_ctx->ioch) {
+ spdk_put_io_channel(init_ctx->ioch);
+ }
+
+ free(init_ctx);
+}
+
+static void
+ftl_call_init_complete_cb(void *ctx)
+{
+ struct ftl_dev_init_ctx *init_ctx = ctx;
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+
+ if (init_ctx->cb_fn != NULL) {
+ init_ctx->cb_fn(dev, init_ctx->cb_arg, 0);
+ }
+
+ ftl_dev_free_init_ctx(init_ctx);
+}
+
+static void
+ftl_init_complete(struct ftl_dev_init_ctx *init_ctx)
+{
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+
+ pthread_mutex_lock(&g_ftl_queue_lock);
+ STAILQ_INSERT_HEAD(&g_ftl_queue, dev, stailq);
+ pthread_mutex_unlock(&g_ftl_queue_lock);
+
+ dev->initialized = 1;
+
+ spdk_thread_send_msg(init_ctx->thread, ftl_call_init_complete_cb, init_ctx);
+}
+
+static void
+ftl_init_fail_cb(struct spdk_ftl_dev *dev, void *ctx, int status)
+{
+ struct ftl_dev_init_ctx *init_ctx = ctx;
+
+ if (init_ctx->cb_fn != NULL) {
+ init_ctx->cb_fn(NULL, init_ctx->cb_arg, -ENODEV);
+ }
+
+ ftl_dev_free_init_ctx(init_ctx);
+}
+
+static int ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
+ struct spdk_thread *thread);
+
+static void
+ftl_init_fail(struct ftl_dev_init_ctx *init_ctx)
+{
+ if (ftl_dev_free(init_ctx->dev, ftl_init_fail_cb, init_ctx, init_ctx->thread)) {
+ SPDK_ERRLOG("Unable to free the device\n");
+ assert(0);
+ }
+}
+
+static void
+ftl_write_nv_cache_md_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_dev_init_ctx *init_ctx = cb_arg;
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+
+ spdk_bdev_free_io(bdev_io);
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Writing non-volatile cache's metadata header failed\n");
+ ftl_init_fail(init_ctx);
+ return;
+ }
+
+ dev->nv_cache.ready = true;
+ ftl_init_complete(init_ctx);
+}
+
+static void
+ftl_clear_nv_cache_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_dev_init_ctx *init_ctx = cb_arg;
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+ struct ftl_nv_cache *nv_cache = &dev->nv_cache;
+
+ spdk_bdev_free_io(bdev_io);
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Unable to clear the non-volatile cache bdev\n");
+ ftl_init_fail(init_ctx);
+ return;
+ }
+
+ nv_cache->phase = 1;
+ if (ftl_nv_cache_write_header(nv_cache, false, ftl_write_nv_cache_md_cb, init_ctx)) {
+ SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
+ ftl_init_fail(init_ctx);
+ }
+}
+
+static void
+_ftl_nv_cache_scrub(void *ctx)
+{
+ struct ftl_dev_init_ctx *init_ctx = ctx;
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+ int rc;
+
+ rc = ftl_nv_cache_scrub(&dev->nv_cache, ftl_clear_nv_cache_cb, init_ctx);
+
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to clear the non-volatile cache bdev: %s\n",
+ spdk_strerror(-rc));
+ ftl_init_fail(init_ctx);
+ }
+}
+
+static int
+ftl_setup_initial_state(struct ftl_dev_init_ctx *init_ctx)
+{
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+ struct spdk_ftl_conf *conf = &dev->conf;
+ size_t i;
+
+ spdk_uuid_generate(&dev->uuid);
+
+ dev->num_lbas = 0;
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ dev->num_lbas += ftl_band_num_usable_blocks(&dev->bands[i]);
+ }
+
+ dev->num_lbas = (dev->num_lbas * (100 - conf->lba_rsvd)) / 100;
+
+ if (ftl_dev_l2p_alloc(dev)) {
+ SPDK_ERRLOG("Unable to init l2p table\n");
+ return -1;
+ }
+
+ if (ftl_init_bands_state(dev)) {
+ SPDK_ERRLOG("Unable to finish the initialization\n");
+ return -1;
+ }
+
+ if (!ftl_dev_has_nv_cache(dev)) {
+ ftl_init_complete(init_ctx);
+ } else {
+ spdk_thread_send_msg(ftl_get_core_thread(dev), _ftl_nv_cache_scrub, init_ctx);
+ }
+
+ return 0;
+}
+
+static void
+ftl_restore_nv_cache_cb(struct ftl_restore *restore, int status, void *cb_arg)
+{
+ struct ftl_dev_init_ctx *init_ctx = cb_arg;
+
+ if (spdk_unlikely(status != 0)) {
+ SPDK_ERRLOG("Failed to restore the non-volatile cache state\n");
+ ftl_init_fail(init_ctx);
+ return;
+ }
+
+ ftl_init_complete(init_ctx);
+}
+
+static void
+ftl_restore_device_cb(struct ftl_restore *restore, int status, void *cb_arg)
+{
+ struct ftl_dev_init_ctx *init_ctx = cb_arg;
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+
+ if (status) {
+ SPDK_ERRLOG("Failed to restore the device from the SSD\n");
+ ftl_init_fail(init_ctx);
+ return;
+ }
+
+ if (ftl_init_bands_state(dev)) {
+ SPDK_ERRLOG("Unable to finish the initialization\n");
+ ftl_init_fail(init_ctx);
+ return;
+ }
+
+ if (!ftl_dev_has_nv_cache(dev)) {
+ ftl_init_complete(init_ctx);
+ return;
+ }
+
+ ftl_restore_nv_cache(restore, ftl_restore_nv_cache_cb, init_ctx);
+}
+
+static void
+ftl_restore_md_cb(struct ftl_restore *restore, int status, void *cb_arg)
+{
+ struct ftl_dev_init_ctx *init_ctx = cb_arg;
+
+ if (status) {
+ SPDK_ERRLOG("Failed to restore the metadata from the SSD\n");
+ goto error;
+ }
+
+ /* After the metadata is read it should be possible to allocate the L2P */
+ if (ftl_dev_l2p_alloc(init_ctx->dev)) {
+ SPDK_ERRLOG("Failed to allocate the L2P\n");
+ goto error;
+ }
+
+ if (ftl_restore_device(restore, ftl_restore_device_cb, init_ctx)) {
+ SPDK_ERRLOG("Failed to start device restoration from the SSD\n");
+ goto error;
+ }
+
+ return;
+error:
+ ftl_init_fail(init_ctx);
+}
+
+static int
+ftl_restore_state(struct ftl_dev_init_ctx *init_ctx)
+{
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+
+ dev->uuid = init_ctx->opts.uuid;
+
+ if (ftl_restore_md(dev, ftl_restore_md_cb, init_ctx)) {
+ SPDK_ERRLOG("Failed to start metadata restoration from the SSD\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+ftl_dev_update_bands(struct spdk_ftl_dev *dev)
+{
+ struct ftl_band *band, *temp_band;
+ size_t i;
+
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ band = &dev->bands[i];
+ band->tail_md_addr = ftl_band_tail_md_addr(band);
+ }
+
+ /* Remove band from shut_bands list to prevent further processing */
+ /* if all blocks on this band are bad */
+ LIST_FOREACH_SAFE(band, &dev->shut_bands, list_entry, temp_band) {
+ if (!band->num_zones) {
+ dev->num_bands--;
+ LIST_REMOVE(band, list_entry);
+ }
+ }
+}
+
+static void
+ftl_dev_init_state(struct ftl_dev_init_ctx *init_ctx)
+{
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+
+ ftl_dev_update_bands(dev);
+
+ if (ftl_dev_init_core_thread(dev, &init_ctx->opts)) {
+ SPDK_ERRLOG("Unable to initialize device thread\n");
+ ftl_init_fail(init_ctx);
+ return;
+ }
+
+ if (init_ctx->opts.mode & SPDK_FTL_MODE_CREATE) {
+ if (ftl_setup_initial_state(init_ctx)) {
+ SPDK_ERRLOG("Failed to setup initial state of the device\n");
+ ftl_init_fail(init_ctx);
+ return;
+ }
+ } else {
+ if (ftl_restore_state(init_ctx)) {
+ SPDK_ERRLOG("Unable to restore device's state from the SSD\n");
+ ftl_init_fail(init_ctx);
+ return;
+ }
+ }
+}
+
+static void ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx);
+
+static void
+ftl_dev_get_zone_info_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_dev_init_ctx *init_ctx = cb_arg;
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+ struct ftl_band *band;
+ struct ftl_zone *zone;
+ struct ftl_addr addr;
+ size_t i, zones_left, num_zones;
+
+ spdk_bdev_free_io(bdev_io);
+
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
+ ftl_init_fail(init_ctx);
+ return;
+ }
+
+ zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
+ num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
+
+ for (i = 0; i < num_zones; ++i) {
+ addr.offset = init_ctx->info[i].zone_id;
+ band = &dev->bands[ftl_addr_get_band(dev, addr)];
+ zone = &band->zone_buf[ftl_addr_get_punit(dev, addr)];
+ zone->info = init_ctx->info[i];
+
+ /* TODO: add support for zone capacity less than zone size */
+ if (zone->info.capacity != ftl_get_num_blocks_in_zone(dev)) {
+ zone->info.state = SPDK_BDEV_ZONE_STATE_OFFLINE;
+ SPDK_ERRLOG("Zone capacity is not equal zone size for "
+ "zone id: %"PRIu64"\n", init_ctx->zone_id);
+ }
+
+ /* Set write pointer to the last block plus one for zone in full state */
+ if (zone->info.state == SPDK_BDEV_ZONE_STATE_FULL) {
+ zone->info.write_pointer = zone->info.zone_id + zone->info.capacity;
+ }
+
+ if (zone->info.state != SPDK_BDEV_ZONE_STATE_OFFLINE) {
+ band->num_zones++;
+ CIRCLEQ_INSERT_TAIL(&band->zones, zone, circleq);
+ }
+ }
+
+ init_ctx->zone_id = init_ctx->zone_id + num_zones * ftl_get_num_blocks_in_zone(dev);
+
+ ftl_dev_get_zone_info(init_ctx);
+}
+
+static void
+ftl_dev_get_zone_info(struct ftl_dev_init_ctx *init_ctx)
+{
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+ size_t zones_left, num_zones;
+ int rc;
+
+ zones_left = ftl_get_num_zones(dev) - (init_ctx->zone_id / ftl_get_num_blocks_in_zone(dev));
+ if (zones_left == 0) {
+ ftl_dev_init_state(init_ctx);
+ return;
+ }
+
+ num_zones = spdk_min(zones_left, FTL_ZONE_INFO_COUNT);
+
+ rc = spdk_bdev_get_zone_info(dev->base_bdev_desc, init_ctx->ioch,
+ init_ctx->zone_id, num_zones, init_ctx->info,
+ ftl_dev_get_zone_info_cb, init_ctx);
+
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to read zone info for zone id: %"PRIu64"\n", init_ctx->zone_id);
+ ftl_init_fail(init_ctx);
+ }
+}
+
+static int
+ftl_dev_init_zones(struct ftl_dev_init_ctx *init_ctx)
+{
+ struct spdk_ftl_dev *dev = init_ctx->dev;
+
+ init_ctx->zone_id = 0;
+ init_ctx->ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
+ if (!init_ctx->ioch) {
+ SPDK_ERRLOG("Failed to get base bdev IO channel\n");
+ return -1;
+ }
+
+ ftl_dev_get_zone_info(init_ctx);
+
+ return 0;
+}
+
+struct _ftl_io_channel {
+ struct ftl_io_channel *ioch;
+};
+
+struct ftl_io_channel *
+ftl_io_channel_get_ctx(struct spdk_io_channel *ioch)
+{
+ struct _ftl_io_channel *_ioch = spdk_io_channel_get_ctx(ioch);
+
+ return _ioch->ioch;
+}
+
+static void
+ftl_io_channel_register(void *ctx)
+{
+ struct ftl_io_channel *ioch = ctx;
+ struct spdk_ftl_dev *dev = ioch->dev;
+ uint32_t ioch_index;
+
+ for (ioch_index = 0; ioch_index < dev->conf.max_io_channels; ++ioch_index) {
+ if (dev->ioch_array[ioch_index] == NULL) {
+ dev->ioch_array[ioch_index] = ioch;
+ ioch->index = ioch_index;
+ break;
+ }
+ }
+
+ assert(ioch_index < dev->conf.max_io_channels);
+ TAILQ_INSERT_TAIL(&dev->ioch_queue, ioch, tailq);
+}
+
+static int
+ftl_io_channel_init_wbuf(struct ftl_io_channel *ioch)
+{
+ struct spdk_ftl_dev *dev = ioch->dev;
+ struct ftl_wbuf_entry *entry;
+ uint32_t i;
+ int rc;
+
+ ioch->num_entries = dev->conf.write_buffer_size / FTL_BLOCK_SIZE;
+ ioch->wbuf_entries = calloc(ioch->num_entries, sizeof(*ioch->wbuf_entries));
+ if (ioch->wbuf_entries == NULL) {
+ SPDK_ERRLOG("Failed to allocate write buffer entry array\n");
+ return -1;
+ }
+
+ ioch->qdepth_limit = ioch->num_entries;
+ ioch->wbuf_payload = spdk_zmalloc(dev->conf.write_buffer_size, FTL_BLOCK_SIZE, NULL,
+ SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
+ if (ioch->wbuf_payload == NULL) {
+ SPDK_ERRLOG("Failed to allocate write buffer payload\n");
+ goto error_entries;
+ }
+
+ ioch->free_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
+ spdk_align32pow2(ioch->num_entries + 1),
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (ioch->free_queue == NULL) {
+ SPDK_ERRLOG("Failed to allocate free queue\n");
+ goto error_payload;
+ }
+
+ ioch->submit_queue = spdk_ring_create(SPDK_RING_TYPE_SP_SC,
+ spdk_align32pow2(ioch->num_entries + 1),
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (ioch->submit_queue == NULL) {
+ SPDK_ERRLOG("Failed to allocate submit queue\n");
+ goto error_free_queue;
+ }
+
+ for (i = 0; i < ioch->num_entries; ++i) {
+ entry = &ioch->wbuf_entries[i];
+ entry->payload = (char *)ioch->wbuf_payload + i * FTL_BLOCK_SIZE;
+ entry->ioch = ioch;
+ entry->index = i;
+ entry->addr.offset = FTL_ADDR_INVALID;
+
+ rc = pthread_spin_init(&entry->lock, PTHREAD_PROCESS_PRIVATE);
+ if (rc != 0) {
+ SPDK_ERRLOG("Failed to initialize spinlock\n");
+ goto error_spinlock;
+ }
+
+ spdk_ring_enqueue(ioch->free_queue, (void **)&entry, 1, NULL);
+ }
+
+ return 0;
+error_spinlock:
+ for (; i > 0; --i) {
+ pthread_spin_destroy(&ioch->wbuf_entries[i - 1].lock);
+ }
+
+ spdk_ring_free(ioch->submit_queue);
+error_free_queue:
+ spdk_ring_free(ioch->free_queue);
+error_payload:
+ spdk_free(ioch->wbuf_payload);
+error_entries:
+ free(ioch->wbuf_entries);
+
+ return -1;
+}
+
+static int
+ftl_io_channel_create_cb(void *io_device, void *ctx)
+{
+ struct spdk_ftl_dev *dev = io_device;
+ struct _ftl_io_channel *_ioch = ctx;
+ struct ftl_io_channel *ioch;
+ uint32_t num_io_channels;
+ char mempool_name[32];
+ int rc;
+
+ num_io_channels = __atomic_fetch_add(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
+ if (num_io_channels >= dev->conf.max_io_channels) {
+ SPDK_ERRLOG("Reached maximum number of IO channels\n");
+ __atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
+ return -1;
+ }
+
+ ioch = calloc(1, sizeof(*ioch));
+ if (ioch == NULL) {
+ SPDK_ERRLOG("Failed to allocate IO channel\n");
+ return -1;
+ }
+
+ rc = snprintf(mempool_name, sizeof(mempool_name), "ftl_io_%p", ioch);
+ if (rc < 0 || rc >= (int)sizeof(mempool_name)) {
+ SPDK_ERRLOG("Failed to create IO channel pool name\n");
+ free(ioch);
+ return -1;
+ }
+
+ ioch->cache_ioch = NULL;
+ ioch->index = FTL_IO_CHANNEL_INDEX_INVALID;
+ ioch->dev = dev;
+ ioch->elem_size = sizeof(struct ftl_md_io);
+ ioch->io_pool = spdk_mempool_create(mempool_name,
+ dev->conf.user_io_pool_size,
+ ioch->elem_size,
+ 0,
+ SPDK_ENV_SOCKET_ID_ANY);
+ if (!ioch->io_pool) {
+ SPDK_ERRLOG("Failed to create IO channel's IO pool\n");
+ free(ioch);
+ return -1;
+ }
+
+ ioch->base_ioch = spdk_bdev_get_io_channel(dev->base_bdev_desc);
+ if (!ioch->base_ioch) {
+ SPDK_ERRLOG("Failed to create base bdev IO channel\n");
+ goto fail_ioch;
+ }
+
+ if (ftl_dev_has_nv_cache(dev)) {
+ ioch->cache_ioch = spdk_bdev_get_io_channel(dev->nv_cache.bdev_desc);
+ if (!ioch->cache_ioch) {
+ SPDK_ERRLOG("Failed to create cache IO channel\n");
+ goto fail_cache;
+ }
+ }
+
+ TAILQ_INIT(&ioch->write_cmpl_queue);
+ TAILQ_INIT(&ioch->retry_queue);
+ ioch->poller = SPDK_POLLER_REGISTER(ftl_io_channel_poll, ioch, 0);
+ if (!ioch->poller) {
+ SPDK_ERRLOG("Failed to register IO channel poller\n");
+ goto fail_poller;
+ }
+
+ if (ftl_io_channel_init_wbuf(ioch)) {
+ SPDK_ERRLOG("Failed to initialize IO channel's write buffer\n");
+ goto fail_wbuf;
+ }
+
+ _ioch->ioch = ioch;
+
+ spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_register, ioch);
+
+ return 0;
+fail_wbuf:
+ spdk_poller_unregister(&ioch->poller);
+fail_poller:
+ if (ioch->cache_ioch) {
+ spdk_put_io_channel(ioch->cache_ioch);
+ }
+fail_cache:
+ spdk_put_io_channel(ioch->base_ioch);
+fail_ioch:
+ spdk_mempool_free(ioch->io_pool);
+ free(ioch);
+
+ return -1;
+}
+
+static void
+ftl_io_channel_unregister(void *ctx)
+{
+ struct ftl_io_channel *ioch = ctx;
+ struct spdk_ftl_dev *dev = ioch->dev;
+ uint32_t i, num_io_channels __attribute__((unused));
+
+ assert(ioch->index < dev->conf.max_io_channels);
+ assert(dev->ioch_array[ioch->index] == ioch);
+
+ dev->ioch_array[ioch->index] = NULL;
+ TAILQ_REMOVE(&dev->ioch_queue, ioch, tailq);
+
+ num_io_channels = __atomic_fetch_sub(&dev->num_io_channels, 1, __ATOMIC_SEQ_CST);
+ assert(num_io_channels > 0);
+
+ for (i = 0; i < ioch->num_entries; ++i) {
+ pthread_spin_destroy(&ioch->wbuf_entries[i].lock);
+ }
+
+ spdk_mempool_free(ioch->io_pool);
+ spdk_ring_free(ioch->free_queue);
+ spdk_ring_free(ioch->submit_queue);
+ spdk_free(ioch->wbuf_payload);
+ free(ioch->wbuf_entries);
+ free(ioch);
+}
+
+static void
+_ftl_io_channel_destroy_cb(void *ctx)
+{
+ struct ftl_io_channel *ioch = ctx;
+ struct spdk_ftl_dev *dev = ioch->dev;
+ uint32_t i;
+
+ /* Do not destroy the channel if some of its entries are still in use */
+ if (spdk_ring_count(ioch->free_queue) != ioch->num_entries) {
+ spdk_thread_send_msg(spdk_get_thread(), _ftl_io_channel_destroy_cb, ctx);
+ return;
+ }
+
+ /* Evict all valid entries from cache */
+ for (i = 0; i < ioch->num_entries; ++i) {
+ ftl_evict_cache_entry(dev, &ioch->wbuf_entries[i]);
+ }
+
+ spdk_poller_unregister(&ioch->poller);
+
+ spdk_put_io_channel(ioch->base_ioch);
+ if (ioch->cache_ioch) {
+ spdk_put_io_channel(ioch->cache_ioch);
+ }
+
+ ioch->base_ioch = NULL;
+ ioch->cache_ioch = NULL;
+
+ spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_io_channel_unregister, ioch);
+}
+
+static void
+ftl_io_channel_destroy_cb(void *io_device, void *ctx)
+{
+ struct _ftl_io_channel *_ioch = ctx;
+ struct ftl_io_channel *ioch = _ioch->ioch;
+
+ /* Mark the IO channel as being flush to force out any unwritten entries */
+ ioch->flush = true;
+
+ _ftl_io_channel_destroy_cb(ioch);
+}
+
+static int
+ftl_dev_init_io_channel(struct spdk_ftl_dev *dev)
+{
+ struct ftl_batch *batch;
+ uint32_t i;
+
+ /* Align the IO channels to nearest power of 2 to allow for easy addr bit shift */
+ dev->conf.max_io_channels = spdk_align32pow2(dev->conf.max_io_channels);
+ dev->ioch_shift = spdk_u32log2(dev->conf.max_io_channels);
+
+ dev->ioch_array = calloc(dev->conf.max_io_channels, sizeof(*dev->ioch_array));
+ if (!dev->ioch_array) {
+ SPDK_ERRLOG("Failed to allocate IO channel array\n");
+ return -1;
+ }
+
+ if (dev->md_size > 0) {
+ dev->md_buf = spdk_zmalloc(dev->md_size * dev->xfer_size * FTL_BATCH_COUNT,
+ dev->md_size, NULL, SPDK_ENV_LCORE_ID_ANY,
+ SPDK_MALLOC_DMA);
+ if (dev->md_buf == NULL) {
+ SPDK_ERRLOG("Failed to allocate metadata buffer\n");
+ return -1;
+ }
+ }
+
+ dev->iov_buf = calloc(FTL_BATCH_COUNT, dev->xfer_size * sizeof(struct iovec));
+ if (!dev->iov_buf) {
+ SPDK_ERRLOG("Failed to allocate iovec buffer\n");
+ return -1;
+ }
+
+ TAILQ_INIT(&dev->free_batches);
+ TAILQ_INIT(&dev->pending_batches);
+ TAILQ_INIT(&dev->ioch_queue);
+
+ for (i = 0; i < FTL_BATCH_COUNT; ++i) {
+ batch = &dev->batch_array[i];
+ batch->iov = &dev->iov_buf[i * dev->xfer_size];
+ batch->num_entries = 0;
+ batch->index = i;
+ TAILQ_INIT(&batch->entries);
+ if (dev->md_buf != NULL) {
+ batch->metadata = (char *)dev->md_buf + i * dev->xfer_size * dev->md_size;
+ }
+
+ TAILQ_INSERT_TAIL(&dev->free_batches, batch, tailq);
+ }
+
+ dev->num_io_channels = 0;
+
+ spdk_io_device_register(dev, ftl_io_channel_create_cb, ftl_io_channel_destroy_cb,
+ sizeof(struct _ftl_io_channel),
+ NULL);
+
+ return 0;
+}
+
+static int
+ftl_dev_init_base_bdev(struct spdk_ftl_dev *dev, const char *bdev_name)
+{
+ uint32_t block_size;
+ uint64_t num_blocks;
+ struct spdk_bdev *bdev;
+
+ bdev = spdk_bdev_get_by_name(bdev_name);
+ if (!bdev) {
+ SPDK_ERRLOG("Unable to find bdev: %s\n", bdev_name);
+ return -1;
+ }
+
+ if (!spdk_bdev_is_zoned(bdev)) {
+ SPDK_ERRLOG("Bdev dosen't support zone capabilities: %s\n",
+ spdk_bdev_get_name(bdev));
+ return -1;
+ }
+
+ if (spdk_bdev_open_ext(bdev_name, true, ftl_bdev_event_cb,
+ dev, &dev->base_bdev_desc)) {
+ SPDK_ERRLOG("Unable to open bdev: %s\n", bdev_name);
+ return -1;
+ }
+
+ if (spdk_bdev_module_claim_bdev(bdev, dev->base_bdev_desc, &g_ftl_bdev_module)) {
+ spdk_bdev_close(dev->base_bdev_desc);
+ dev->base_bdev_desc = NULL;
+ SPDK_ERRLOG("Unable to claim bdev %s\n", bdev_name);
+ return -1;
+ }
+
+ dev->xfer_size = spdk_bdev_get_write_unit_size(bdev);
+ dev->md_size = spdk_bdev_get_md_size(bdev);
+
+ block_size = spdk_bdev_get_block_size(bdev);
+ if (block_size != FTL_BLOCK_SIZE) {
+ SPDK_ERRLOG("Unsupported block size (%"PRIu32")\n", block_size);
+ return -1;
+ }
+
+ num_blocks = spdk_bdev_get_num_blocks(bdev);
+ if (num_blocks % ftl_get_num_punits(dev)) {
+ SPDK_ERRLOG("Unsupported geometry. Base bdev block count must be multiple "
+ "of optimal number of zones.\n");
+ return -1;
+ }
+
+ if (ftl_is_append_supported(dev) &&
+ !spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZONE_APPEND)) {
+ SPDK_ERRLOG("Bdev dosen't support append: %s\n",
+ spdk_bdev_get_name(bdev));
+ return -1;
+ }
+
+ dev->num_bands = num_blocks / (ftl_get_num_punits(dev) * ftl_get_num_blocks_in_zone(dev));
+ dev->addr_len = spdk_u64log2(num_blocks) + 1;
+
+ return 0;
+}
+
+static void
+ftl_lba_map_request_dtor(struct spdk_mempool *mp, void *opaque, void *obj, unsigned obj_idx)
+{
+ struct ftl_lba_map_request *request = obj;
+
+ spdk_bit_array_free(&request->segments);
+}
+
+static void
+ftl_release_bdev(struct spdk_bdev_desc *bdev_desc)
+{
+ if (!bdev_desc) {
+ return;
+ }
+
+ spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(bdev_desc));
+ spdk_bdev_close(bdev_desc);
+}
+
+static void
+ftl_dev_free_sync(struct spdk_ftl_dev *dev)
+{
+ struct spdk_ftl_dev *iter;
+ size_t i;
+
+ if (!dev) {
+ return;
+ }
+
+ pthread_mutex_lock(&g_ftl_queue_lock);
+ STAILQ_FOREACH(iter, &g_ftl_queue, stailq) {
+ if (iter == dev) {
+ STAILQ_REMOVE(&g_ftl_queue, dev, spdk_ftl_dev, stailq);
+ break;
+ }
+ }
+ pthread_mutex_unlock(&g_ftl_queue_lock);
+
+ assert(LIST_EMPTY(&dev->wptr_list));
+ assert(dev->current_batch == NULL);
+
+ ftl_dev_dump_bands(dev);
+ ftl_dev_dump_stats(dev);
+
+ if (dev->bands) {
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ free(dev->bands[i].zone_buf);
+ spdk_bit_array_free(&dev->bands[i].lba_map.vld);
+ spdk_bit_array_free(&dev->bands[i].reloc_bitmap);
+ }
+ }
+
+ spdk_dma_free(dev->nv_cache.dma_buf);
+
+ spdk_mempool_free(dev->lba_pool);
+ spdk_mempool_free(dev->nv_cache.md_pool);
+ spdk_mempool_free(dev->media_events_pool);
+ if (dev->lba_request_pool) {
+ spdk_mempool_obj_iter(dev->lba_request_pool, ftl_lba_map_request_dtor, NULL);
+ }
+ spdk_mempool_free(dev->lba_request_pool);
+
+ ftl_reloc_free(dev->reloc);
+
+ ftl_release_bdev(dev->nv_cache.bdev_desc);
+ ftl_release_bdev(dev->base_bdev_desc);
+
+ spdk_free(dev->md_buf);
+
+ assert(dev->num_io_channels == 0);
+ free(dev->ioch_array);
+ free(dev->iov_buf);
+ free(dev->name);
+ free(dev->bands);
+ if (dev->l2p_pmem_len != 0) {
+#ifdef SPDK_CONFIG_PMDK
+ pmem_unmap(dev->l2p, dev->l2p_pmem_len);
+#endif /* SPDK_CONFIG_PMDK */
+ } else {
+ free(dev->l2p);
+ }
+ free((char *)dev->conf.l2p_path);
+ free(dev);
+}
+
+int
+spdk_ftl_dev_init(const struct spdk_ftl_dev_init_opts *_opts, spdk_ftl_init_fn cb_fn, void *cb_arg)
+{
+ struct spdk_ftl_dev *dev;
+ struct spdk_ftl_dev_init_opts opts = *_opts;
+ struct ftl_dev_init_ctx *init_ctx = NULL;
+ int rc = -ENOMEM;
+
+ dev = calloc(1, sizeof(*dev));
+ if (!dev) {
+ return -ENOMEM;
+ }
+
+ init_ctx = calloc(1, sizeof(*init_ctx));
+ if (!init_ctx) {
+ goto fail_sync;
+ }
+
+ init_ctx->dev = dev;
+ init_ctx->opts = *_opts;
+ init_ctx->cb_fn = cb_fn;
+ init_ctx->cb_arg = cb_arg;
+ init_ctx->thread = spdk_get_thread();
+
+ if (!opts.conf) {
+ opts.conf = &g_default_conf;
+ }
+
+ if (!opts.base_bdev) {
+ SPDK_ERRLOG("Lack of underlying device in configuration\n");
+ rc = -EINVAL;
+ goto fail_sync;
+ }
+
+ dev->conf = *opts.conf;
+ dev->limit = SPDK_FTL_LIMIT_MAX;
+
+ dev->name = strdup(opts.name);
+ if (!dev->name) {
+ SPDK_ERRLOG("Unable to set device name\n");
+ goto fail_sync;
+ }
+
+ if (ftl_dev_init_base_bdev(dev, opts.base_bdev)) {
+ SPDK_ERRLOG("Unsupported underlying device\n");
+ goto fail_sync;
+ }
+
+ if (opts.conf->l2p_path) {
+ dev->conf.l2p_path = strdup(opts.conf->l2p_path);
+ if (!dev->conf.l2p_path) {
+ rc = -ENOMEM;
+ goto fail_sync;
+ }
+ }
+
+ /* In case of errors, we free all of the memory in ftl_dev_free_sync(), */
+ /* so we don't have to clean up in each of the init functions. */
+ if (ftl_check_conf(dev, opts.conf)) {
+ SPDK_ERRLOG("Invalid device configuration\n");
+ goto fail_sync;
+ }
+
+ if (ftl_init_lba_map_pools(dev)) {
+ SPDK_ERRLOG("Unable to init LBA map pools\n");
+ goto fail_sync;
+ }
+
+ if (ftl_init_media_events_pool(dev)) {
+ SPDK_ERRLOG("Unable to init media events pools\n");
+ goto fail_sync;
+ }
+
+ ftl_init_wptr_list(dev);
+
+ if (ftl_dev_init_bands(dev)) {
+ SPDK_ERRLOG("Unable to initialize band array\n");
+ goto fail_sync;
+ }
+
+ if (ftl_dev_init_nv_cache(dev, opts.cache_bdev)) {
+ SPDK_ERRLOG("Unable to initialize persistent cache\n");
+ goto fail_sync;
+ }
+
+ dev->reloc = ftl_reloc_init(dev);
+ if (!dev->reloc) {
+ SPDK_ERRLOG("Unable to initialize reloc structures\n");
+ goto fail_sync;
+ }
+
+ if (ftl_dev_init_io_channel(dev)) {
+ SPDK_ERRLOG("Unable to initialize IO channels\n");
+ goto fail_sync;
+ }
+
+ if (ftl_dev_init_zones(init_ctx)) {
+ SPDK_ERRLOG("Failed to initialize zones\n");
+ goto fail_async;
+ }
+
+ return 0;
+fail_sync:
+ ftl_dev_free_sync(dev);
+ ftl_dev_free_init_ctx(init_ctx);
+ return rc;
+fail_async:
+ ftl_init_fail(init_ctx);
+ return 0;
+}
+
+static void
+_ftl_halt_defrag(void *arg)
+{
+ ftl_reloc_halt(((struct spdk_ftl_dev *)arg)->reloc);
+}
+
+static void
+ftl_halt_complete_cb(void *ctx)
+{
+ struct ftl_dev_init_ctx *fini_ctx = ctx;
+ struct spdk_ftl_dev *dev = fini_ctx->dev;
+
+ /* Make sure core IO channel has already been released */
+ if (dev->num_io_channels > 0) {
+ spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
+ return;
+ }
+
+ spdk_io_device_unregister(fini_ctx->dev, NULL);
+
+ ftl_dev_free_sync(fini_ctx->dev);
+ if (fini_ctx->cb_fn != NULL) {
+ fini_ctx->cb_fn(NULL, fini_ctx->cb_arg, fini_ctx->halt_complete_status);
+ }
+
+ ftl_dev_free_init_ctx(fini_ctx);
+}
+
+static void
+ftl_put_io_channel_cb(void *ctx)
+{
+ struct ftl_dev_init_ctx *fini_ctx = ctx;
+ struct spdk_ftl_dev *dev = fini_ctx->dev;
+
+ spdk_put_io_channel(dev->ioch);
+ spdk_thread_send_msg(spdk_get_thread(), ftl_halt_complete_cb, ctx);
+}
+
+static void
+ftl_nv_cache_header_fini_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_dev_init_ctx *fini_ctx = cb_arg;
+ int rc = 0;
+
+ spdk_bdev_free_io(bdev_io);
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Failed to write non-volatile cache metadata header\n");
+ rc = -EIO;
+ }
+
+ fini_ctx->halt_complete_status = rc;
+ spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
+}
+
+static int
+ftl_halt_poller(void *ctx)
+{
+ struct ftl_dev_init_ctx *fini_ctx = ctx;
+ struct spdk_ftl_dev *dev = fini_ctx->dev;
+
+ if (!dev->core_poller) {
+ spdk_poller_unregister(&fini_ctx->poller);
+
+ if (ftl_dev_has_nv_cache(dev)) {
+ ftl_nv_cache_write_header(&dev->nv_cache, true,
+ ftl_nv_cache_header_fini_cb, fini_ctx);
+ } else {
+ fini_ctx->halt_complete_status = 0;
+ spdk_thread_send_msg(fini_ctx->thread, ftl_put_io_channel_cb, fini_ctx);
+ }
+ }
+
+ return SPDK_POLLER_BUSY;
+}
+
+static void
+ftl_add_halt_poller(void *ctx)
+{
+ struct ftl_dev_init_ctx *fini_ctx = ctx;
+ struct spdk_ftl_dev *dev = fini_ctx->dev;
+
+ dev->halt = 1;
+
+ _ftl_halt_defrag(dev);
+
+ assert(!fini_ctx->poller);
+ fini_ctx->poller = SPDK_POLLER_REGISTER(ftl_halt_poller, fini_ctx, 100);
+}
+
+static int
+ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg,
+ struct spdk_thread *thread)
+{
+ struct ftl_dev_init_ctx *fini_ctx;
+
+ if (dev->halt_started) {
+ dev->halt_started = true;
+ return -EBUSY;
+ }
+
+ fini_ctx = calloc(1, sizeof(*fini_ctx));
+ if (!fini_ctx) {
+ return -ENOMEM;
+ }
+
+ fini_ctx->dev = dev;
+ fini_ctx->cb_fn = cb_fn;
+ fini_ctx->cb_arg = cb_arg;
+ fini_ctx->thread = thread;
+
+ spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_add_halt_poller, fini_ctx);
+ return 0;
+}
+
+int
+spdk_ftl_dev_free(struct spdk_ftl_dev *dev, spdk_ftl_init_fn cb_fn, void *cb_arg)
+{
+ return ftl_dev_free(dev, cb_fn, cb_arg, spdk_get_thread());
+}
+
+SPDK_LOG_REGISTER_COMPONENT("ftl_init", SPDK_LOG_FTL_INIT)
diff --git a/src/spdk/lib/ftl/ftl_io.c b/src/spdk/lib/ftl/ftl_io.c
new file mode 100644
index 000000000..39a845bae
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_io.c
@@ -0,0 +1,563 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/ftl.h"
+#include "spdk/likely.h"
+#include "spdk/util.h"
+
+#include "ftl_io.h"
+#include "ftl_core.h"
+#include "ftl_band.h"
+#include "ftl_debug.h"
+
+void
+ftl_io_inc_req(struct ftl_io *io)
+{
+ struct ftl_band *band = io->band;
+
+ if (!(io->flags & FTL_IO_CACHE) && io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) {
+ ftl_band_acquire_lba_map(band);
+ }
+
+ __atomic_fetch_add(&io->dev->num_inflight, 1, __ATOMIC_SEQ_CST);
+
+ ++io->req_cnt;
+}
+
+void
+ftl_io_dec_req(struct ftl_io *io)
+{
+ struct ftl_band *band = io->band;
+ unsigned long num_inflight __attribute__((unused));
+
+ if (!(io->flags & FTL_IO_CACHE) && io->type != FTL_IO_READ && io->type != FTL_IO_ERASE) {
+ ftl_band_release_lba_map(band);
+ }
+
+ num_inflight = __atomic_fetch_sub(&io->dev->num_inflight, 1, __ATOMIC_SEQ_CST);
+
+ assert(num_inflight > 0);
+ assert(io->req_cnt > 0);
+
+ --io->req_cnt;
+}
+
+struct iovec *
+ftl_io_iovec(struct ftl_io *io)
+{
+ return &io->iov[0];
+}
+
+uint64_t
+ftl_io_get_lba(const struct ftl_io *io, size_t offset)
+{
+ assert(offset < io->num_blocks);
+
+ if (io->flags & FTL_IO_VECTOR_LBA) {
+ return io->lba.vector[offset];
+ } else {
+ return io->lba.single + offset;
+ }
+}
+
+uint64_t
+ftl_io_current_lba(const struct ftl_io *io)
+{
+ return ftl_io_get_lba(io, io->pos);
+}
+
+void
+ftl_io_advance(struct ftl_io *io, size_t num_blocks)
+{
+ struct iovec *iov = ftl_io_iovec(io);
+ size_t iov_blocks, block_left = num_blocks;
+
+ io->pos += num_blocks;
+
+ if (io->iov_cnt != 0) {
+ while (block_left > 0) {
+ assert(io->iov_pos < io->iov_cnt);
+ iov_blocks = iov[io->iov_pos].iov_len / FTL_BLOCK_SIZE;
+
+ if (io->iov_off + block_left < iov_blocks) {
+ io->iov_off += block_left;
+ break;
+ }
+
+ assert(iov_blocks > io->iov_off);
+ block_left -= (iov_blocks - io->iov_off);
+ io->iov_off = 0;
+ io->iov_pos++;
+ }
+ }
+
+ if (io->parent) {
+ ftl_io_advance(io->parent, num_blocks);
+ }
+}
+
+size_t
+ftl_iovec_num_blocks(struct iovec *iov, size_t iov_cnt)
+{
+ size_t num_blocks = 0, i = 0;
+
+ for (; i < iov_cnt; ++i) {
+ num_blocks += iov[i].iov_len / FTL_BLOCK_SIZE;
+ }
+
+ return num_blocks;
+}
+
+void *
+ftl_io_iovec_addr(struct ftl_io *io)
+{
+ assert(io->iov_pos < io->iov_cnt);
+ assert(io->iov_off * FTL_BLOCK_SIZE < ftl_io_iovec(io)[io->iov_pos].iov_len);
+
+ return (char *)ftl_io_iovec(io)[io->iov_pos].iov_base +
+ io->iov_off * FTL_BLOCK_SIZE;
+}
+
+size_t
+ftl_io_iovec_len_left(struct ftl_io *io)
+{
+ struct iovec *iov = ftl_io_iovec(io);
+ return iov[io->iov_pos].iov_len / FTL_BLOCK_SIZE - io->iov_off;
+}
+
+static void
+ftl_io_init_iovec(struct ftl_io *io, const struct iovec *iov, size_t iov_cnt, size_t iov_off,
+ size_t num_blocks)
+{
+ size_t offset = 0, num_left;
+
+ io->iov_pos = 0;
+ io->iov_cnt = 0;
+ io->num_blocks = num_blocks;
+
+ while (offset < num_blocks) {
+ assert(io->iov_cnt < FTL_IO_MAX_IOVEC && io->iov_cnt < iov_cnt);
+
+ num_left = spdk_min(iov[io->iov_cnt].iov_len / FTL_BLOCK_SIZE - iov_off,
+ num_blocks);
+ io->iov[io->iov_cnt].iov_base = (char *)iov[io->iov_cnt].iov_base +
+ iov_off * FTL_BLOCK_SIZE;
+ io->iov[io->iov_cnt].iov_len = num_left * FTL_BLOCK_SIZE;
+
+ offset += num_left;
+ io->iov_cnt++;
+ iov_off = 0;
+ }
+}
+
+void
+ftl_io_shrink_iovec(struct ftl_io *io, size_t num_blocks)
+{
+ size_t iov_off = 0, block_off = 0;
+
+ assert(io->num_blocks >= num_blocks);
+ assert(io->pos == 0 && io->iov_pos == 0 && io->iov_off == 0);
+
+ for (; iov_off < io->iov_cnt; ++iov_off) {
+ size_t num_iov = io->iov[iov_off].iov_len / FTL_BLOCK_SIZE;
+ size_t num_left = num_blocks - block_off;
+
+ if (num_iov >= num_left) {
+ io->iov[iov_off].iov_len = num_left * FTL_BLOCK_SIZE;
+ io->iov_cnt = iov_off + 1;
+ io->num_blocks = num_blocks;
+ break;
+ }
+
+ block_off += num_iov;
+ }
+}
+
+static void
+ftl_io_init(struct ftl_io *io, struct spdk_ftl_dev *dev,
+ ftl_io_fn fn, void *ctx, int flags, int type)
+{
+ io->flags |= flags | FTL_IO_INITIALIZED;
+ io->type = type;
+ io->dev = dev;
+ io->lba.single = FTL_LBA_INVALID;
+ io->addr.offset = FTL_ADDR_INVALID;
+ io->cb_fn = fn;
+ io->cb_ctx = ctx;
+ io->trace = ftl_trace_alloc_id(dev);
+}
+
+struct ftl_io *
+ftl_io_init_internal(const struct ftl_io_init_opts *opts)
+{
+ struct ftl_io *io = opts->io;
+ struct ftl_io *parent = opts->parent;
+ struct spdk_ftl_dev *dev = opts->dev;
+ const struct iovec *iov;
+ size_t iov_cnt, iov_off;
+
+ if (!io) {
+ if (parent) {
+ io = ftl_io_alloc_child(parent);
+ } else {
+ io = ftl_io_alloc(ftl_get_io_channel(dev));
+ }
+
+ if (!io) {
+ return NULL;
+ }
+ }
+
+ ftl_io_clear(io);
+ ftl_io_init(io, dev, opts->cb_fn, opts->cb_ctx, opts->flags | FTL_IO_INTERNAL, opts->type);
+
+ io->batch = opts->batch;
+ io->band = opts->band;
+ io->md = opts->md;
+ io->iov = &io->iov_buf[0];
+
+ if (parent) {
+ if (parent->flags & FTL_IO_VECTOR_LBA) {
+ io->lba.vector = parent->lba.vector + parent->pos;
+ } else {
+ io->lba.single = parent->lba.single + parent->pos;
+ }
+
+ iov = &parent->iov[parent->iov_pos];
+ iov_cnt = parent->iov_cnt - parent->iov_pos;
+ iov_off = parent->iov_off;
+ } else {
+ iov = &opts->iovs[0];
+ iov_cnt = opts->iovcnt;
+ iov_off = 0;
+ }
+
+ /* Some requests (zone resets) do not use iovecs */
+ if (iov_cnt > 0) {
+ ftl_io_init_iovec(io, iov, iov_cnt, iov_off, opts->num_blocks);
+ }
+
+ if (opts->flags & FTL_IO_VECTOR_LBA) {
+ io->lba.vector = calloc(io->num_blocks, sizeof(uint64_t));
+ if (!io->lba.vector) {
+ ftl_io_free(io);
+ return NULL;
+ }
+ }
+
+ return io;
+}
+
+struct ftl_io *
+ftl_io_wbuf_init(struct spdk_ftl_dev *dev, struct ftl_addr addr, struct ftl_band *band,
+ struct ftl_batch *batch, ftl_io_fn cb)
+{
+ struct ftl_io *io;
+ struct ftl_io_init_opts opts = {
+ .dev = dev,
+ .io = NULL,
+ .batch = batch,
+ .band = band,
+ .size = sizeof(struct ftl_io),
+ .flags = 0,
+ .type = FTL_IO_WRITE,
+ .num_blocks = dev->xfer_size,
+ .cb_fn = cb,
+ .iovcnt = dev->xfer_size,
+ .md = batch->metadata,
+ };
+
+ memcpy(opts.iovs, batch->iov, sizeof(struct iovec) * dev->xfer_size);
+
+ io = ftl_io_init_internal(&opts);
+ if (!io) {
+ return NULL;
+ }
+
+ io->addr = addr;
+
+ return io;
+}
+
+struct ftl_io *
+ftl_io_erase_init(struct ftl_band *band, size_t num_blocks, ftl_io_fn cb)
+{
+ struct ftl_io *io;
+ struct ftl_io_init_opts opts = {
+ .dev = band->dev,
+ .io = NULL,
+ .band = band,
+ .size = sizeof(struct ftl_io),
+ .flags = FTL_IO_PHYSICAL_MODE,
+ .type = FTL_IO_ERASE,
+ .num_blocks = 1,
+ .cb_fn = cb,
+ .iovcnt = 0,
+ .md = NULL,
+ };
+
+ io = ftl_io_init_internal(&opts);
+ if (!io) {
+ return NULL;
+ }
+
+ io->num_blocks = num_blocks;
+
+ return io;
+}
+
+static void
+_ftl_user_cb(struct ftl_io *io, void *arg, int status)
+{
+ io->user_fn(arg, status);
+}
+
+struct ftl_io *
+ftl_io_user_init(struct spdk_io_channel *_ioch, uint64_t lba, size_t num_blocks, struct iovec *iov,
+ size_t iov_cnt, spdk_ftl_fn cb_fn, void *cb_ctx, int type)
+{
+ struct ftl_io_channel *ioch = ftl_io_channel_get_ctx(_ioch);
+ struct spdk_ftl_dev *dev = ioch->dev;
+ struct ftl_io *io;
+
+ io = ftl_io_alloc(_ioch);
+ if (spdk_unlikely(!io)) {
+ return NULL;
+ }
+
+ ftl_io_init(io, dev, _ftl_user_cb, cb_ctx, 0, type);
+ io->lba.single = lba;
+ io->user_fn = cb_fn;
+ io->iov = iov;
+ io->iov_cnt = iov_cnt;
+ io->num_blocks = num_blocks;
+
+ ftl_trace_lba_io_init(io->dev, io);
+ return io;
+}
+
+static void
+_ftl_io_free(struct ftl_io *io)
+{
+ struct ftl_io_channel *ioch;
+
+ assert(LIST_EMPTY(&io->children));
+
+ if (io->flags & FTL_IO_VECTOR_LBA) {
+ free(io->lba.vector);
+ }
+
+ if (pthread_spin_destroy(&io->lock)) {
+ SPDK_ERRLOG("pthread_spin_destroy failed\n");
+ }
+
+ ioch = ftl_io_channel_get_ctx(io->ioch);
+ spdk_mempool_put(ioch->io_pool, io);
+}
+
+static bool
+ftl_io_remove_child(struct ftl_io *io)
+{
+ struct ftl_io *parent = io->parent;
+ bool parent_done;
+
+ pthread_spin_lock(&parent->lock);
+ LIST_REMOVE(io, child_entry);
+ parent_done = parent->done && LIST_EMPTY(&parent->children);
+ parent->status = parent->status ? : io->status;
+ pthread_spin_unlock(&parent->lock);
+
+ return parent_done;
+}
+
+void
+ftl_io_complete(struct ftl_io *io)
+{
+ struct ftl_io *parent = io->parent;
+ bool complete;
+
+ io->flags &= ~FTL_IO_INITIALIZED;
+
+ pthread_spin_lock(&io->lock);
+ complete = LIST_EMPTY(&io->children);
+ io->done = true;
+ pthread_spin_unlock(&io->lock);
+
+ if (complete) {
+ if (io->cb_fn) {
+ io->cb_fn(io, io->cb_ctx, io->status);
+ }
+
+ if (parent && ftl_io_remove_child(io)) {
+ ftl_io_complete(parent);
+ }
+
+ _ftl_io_free(io);
+ }
+}
+
+struct ftl_io *
+ftl_io_alloc_child(struct ftl_io *parent)
+{
+ struct ftl_io *io;
+
+ io = ftl_io_alloc(parent->ioch);
+ if (spdk_unlikely(!io)) {
+ return NULL;
+ }
+
+ ftl_io_init(io, parent->dev, NULL, NULL, parent->flags, parent->type);
+ io->parent = parent;
+
+ pthread_spin_lock(&parent->lock);
+ LIST_INSERT_HEAD(&parent->children, io, child_entry);
+ pthread_spin_unlock(&parent->lock);
+
+ return io;
+}
+
+void ftl_io_fail(struct ftl_io *io, int status)
+{
+ io->status = status;
+ ftl_io_advance(io, io->num_blocks - io->pos);
+}
+
+void *
+ftl_io_get_md(const struct ftl_io *io)
+{
+ if (!io->md) {
+ return NULL;
+ }
+
+ return (char *)io->md + io->pos * io->dev->md_size;
+}
+
+struct ftl_io *
+ftl_io_alloc(struct spdk_io_channel *ch)
+{
+ struct ftl_io *io;
+ struct ftl_io_channel *ioch = ftl_io_channel_get_ctx(ch);
+
+ io = spdk_mempool_get(ioch->io_pool);
+ if (!io) {
+ return NULL;
+ }
+
+ memset(io, 0, ioch->elem_size);
+ io->ioch = ch;
+
+ if (pthread_spin_init(&io->lock, PTHREAD_PROCESS_PRIVATE)) {
+ SPDK_ERRLOG("pthread_spin_init failed\n");
+ spdk_mempool_put(ioch->io_pool, io);
+ return NULL;
+ }
+
+ return io;
+}
+
+void
+ftl_io_reinit(struct ftl_io *io, ftl_io_fn cb, void *ctx, int flags, int type)
+{
+ ftl_io_clear(io);
+ ftl_io_init(io, io->dev, cb, ctx, flags, type);
+}
+
+void
+ftl_io_clear(struct ftl_io *io)
+{
+ ftl_io_reset(io);
+
+ io->flags = 0;
+ io->batch = NULL;
+ io->band = NULL;
+}
+
+void
+ftl_io_reset(struct ftl_io *io)
+{
+ io->req_cnt = io->pos = io->iov_pos = io->iov_off = 0;
+ io->done = false;
+}
+
+void
+ftl_io_free(struct ftl_io *io)
+{
+ struct ftl_io *parent;
+
+ if (!io) {
+ return;
+ }
+
+ parent = io->parent;
+ if (parent && ftl_io_remove_child(io)) {
+ ftl_io_complete(parent);
+ }
+
+ _ftl_io_free(io);
+}
+
+void
+ftl_io_call_foreach_child(struct ftl_io *io, int (*callback)(struct ftl_io *))
+{
+ struct ftl_io *child, *tmp;
+
+ assert(!io->done);
+
+ /*
+ * If the IO doesn't have any children, it means that it directly describes a request (i.e.
+ * all of the buffers, LBAs, etc. are filled). Otherwise the IO only groups together several
+ * requests and may be partially filled, so the callback needs to be called on all of its
+ * children instead.
+ */
+ if (LIST_EMPTY(&io->children)) {
+ callback(io);
+ return;
+ }
+
+ LIST_FOREACH_SAFE(child, &io->children, child_entry, tmp) {
+ int rc = callback(child);
+ if (rc) {
+ assert(rc != -EAGAIN);
+ ftl_io_fail(io, rc);
+ break;
+ }
+ }
+
+ /*
+ * If all the callbacks were processed or an error occurred, treat this IO as completed.
+ * Multiple calls to ftl_io_call_foreach_child are not supported, resubmissions are supposed
+ * to be handled in the callback.
+ */
+ ftl_io_complete(io);
+}
diff --git a/src/spdk/lib/ftl/ftl_io.h b/src/spdk/lib/ftl/ftl_io.h
new file mode 100644
index 000000000..d49dc3de7
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_io.h
@@ -0,0 +1,351 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FTL_IO_H
+#define FTL_IO_H
+
+#include "spdk/stdinc.h"
+#include "spdk/nvme.h"
+#include "spdk/ftl.h"
+
+#include "ftl_addr.h"
+#include "ftl_trace.h"
+
+struct spdk_ftl_dev;
+struct ftl_band;
+struct ftl_batch;
+struct ftl_io;
+
+typedef int (*ftl_md_pack_fn)(struct ftl_band *);
+typedef void (*ftl_io_fn)(struct ftl_io *, void *, int);
+
+/* IO flags */
+enum ftl_io_flags {
+ /* Indicates whether IO is already initialized */
+ FTL_IO_INITIALIZED = (1 << 0),
+ /* Internal based IO (defrag, metadata etc.) */
+ FTL_IO_INTERNAL = (1 << 1),
+ /* Indicates that the IO should not go through if there's */
+ /* already another one scheduled to the same LBA */
+ FTL_IO_WEAK = (1 << 2),
+ /* Indicates that the IO is used for padding */
+ FTL_IO_PAD = (1 << 3),
+ /* The IO operates on metadata */
+ FTL_IO_MD = (1 << 4),
+ /* Using physical instead of logical address */
+ FTL_IO_PHYSICAL_MODE = (1 << 5),
+ /* Indicates that IO contains noncontiguous LBAs */
+ FTL_IO_VECTOR_LBA = (1 << 6),
+ /* The IO is directed to non-volatile cache */
+ FTL_IO_CACHE = (1 << 7),
+ /* Indicates that physical address should be taken from IO struct, */
+ /* not assigned by wptr, only works if wptr is also in direct mode */
+ FTL_IO_DIRECT_ACCESS = (1 << 8),
+ /* Bypass the non-volatile cache */
+ FTL_IO_BYPASS_CACHE = (1 << 9),
+};
+
+enum ftl_io_type {
+ FTL_IO_READ,
+ FTL_IO_WRITE,
+ FTL_IO_ERASE,
+};
+
+#define FTL_IO_MAX_IOVEC 64
+
+struct ftl_io_init_opts {
+ struct spdk_ftl_dev *dev;
+
+ /* IO descriptor */
+ struct ftl_io *io;
+
+ /* Parent request */
+ struct ftl_io *parent;
+
+ /* Size of IO descriptor */
+ size_t size;
+
+ /* IO flags */
+ int flags;
+
+ /* IO type */
+ enum ftl_io_type type;
+
+ /* Transfer batch, set for IO going through the write buffer */
+ struct ftl_batch *batch;
+
+ /* Band to which the IO is directed */
+ struct ftl_band *band;
+
+ /* Number of logical blocks */
+ size_t num_blocks;
+
+ /* Data */
+ struct iovec iovs[FTL_IO_MAX_IOVEC];
+ int iovcnt;
+
+ /* Metadata */
+ void *md;
+
+ /* Callback's function */
+ ftl_io_fn cb_fn;
+
+ /* Callback's context */
+ void *cb_ctx;
+};
+
+struct ftl_io_channel;
+
+struct ftl_wbuf_entry {
+ /* IO channel that owns the write bufer entry */
+ struct ftl_io_channel *ioch;
+ /* Data payload (single block) */
+ void *payload;
+ /* Index within the IO channel's wbuf_entries array */
+ uint32_t index;
+ uint32_t io_flags;
+ /* Points at the band the data is copied from. Only valid for internal
+ * requests coming from reloc.
+ */
+ struct ftl_band *band;
+ /* Physical address of that particular block. Valid once the data has
+ * been written out.
+ */
+ struct ftl_addr addr;
+ /* Logical block address */
+ uint64_t lba;
+
+ /* Trace ID of the requests the entry is part of */
+ uint64_t trace;
+
+ /* Indicates that the entry was written out and is still present in the
+ * L2P table.
+ */
+ bool valid;
+ /* Lock that protects the entry from being evicted from the L2P */
+ pthread_spinlock_t lock;
+ TAILQ_ENTRY(ftl_wbuf_entry) tailq;
+};
+
+#define FTL_IO_CHANNEL_INDEX_INVALID ((uint64_t)-1)
+
+struct ftl_io_channel {
+ /* Device */
+ struct spdk_ftl_dev *dev;
+ /* IO pool element size */
+ size_t elem_size;
+ /* Index within the IO channel array */
+ uint64_t index;
+ /* IO pool */
+ struct spdk_mempool *io_pool;
+ /* Underlying device IO channel */
+ struct spdk_io_channel *base_ioch;
+ /* Persistent cache IO channel */
+ struct spdk_io_channel *cache_ioch;
+ /* Poller used for completing write requests and retrying IO */
+ struct spdk_poller *poller;
+ /* Write completion queue */
+ TAILQ_HEAD(, ftl_io) write_cmpl_queue;
+ TAILQ_HEAD(, ftl_io) retry_queue;
+ TAILQ_ENTRY(ftl_io_channel) tailq;
+
+ /* Array of write buffer entries */
+ struct ftl_wbuf_entry *wbuf_entries;
+ /* Write buffer data payload */
+ void *wbuf_payload;
+ /* Number of write buffer entries */
+ uint32_t num_entries;
+ /* Write buffer queues */
+ struct spdk_ring *free_queue;
+ struct spdk_ring *submit_queue;
+ /* Maximum number of concurrent user writes */
+ uint32_t qdepth_limit;
+ /* Current number of concurrent user writes */
+ uint32_t qdepth_current;
+ /* Means that the IO channel is being flushed */
+ bool flush;
+};
+
+/* General IO descriptor */
+struct ftl_io {
+ /* Device */
+ struct spdk_ftl_dev *dev;
+
+ /* IO channel */
+ struct spdk_io_channel *ioch;
+
+ union {
+ /* LBA table */
+ uint64_t *vector;
+
+ /* First LBA */
+ uint64_t single;
+ } lba;
+
+ /* First block address */
+ struct ftl_addr addr;
+
+ /* Number of processed blocks */
+ size_t pos;
+
+ /* Number of blocks */
+ size_t num_blocks;
+
+ /* IO vector pointer */
+ struct iovec *iov;
+
+ /* IO vector buffer for internal requests */
+ struct iovec iov_buf[FTL_IO_MAX_IOVEC];
+
+ /* Metadata */
+ void *md;
+
+ /* Number of IO vectors */
+ size_t iov_cnt;
+
+ /* Position within the iovec */
+ size_t iov_pos;
+
+ /* Offset within the iovec (in blocks) */
+ size_t iov_off;
+
+ /* Transfer batch (valid only for writes going through the write buffer) */
+ struct ftl_batch *batch;
+
+ /* Band this IO is being written to */
+ struct ftl_band *band;
+
+ /* Request status */
+ int status;
+
+ /* Number of split requests */
+ size_t req_cnt;
+
+ /* Callback's function */
+ ftl_io_fn cb_fn;
+
+ /* Callback's context */
+ void *cb_ctx;
+
+ /* User callback function */
+ spdk_ftl_fn user_fn;
+
+ /* Flags */
+ int flags;
+
+ /* IO type */
+ enum ftl_io_type type;
+
+ /* Done flag */
+ bool done;
+
+ /* Parent request */
+ struct ftl_io *parent;
+ /* Child requests list */
+ LIST_HEAD(, ftl_io) children;
+ /* Child list link */
+ LIST_ENTRY(ftl_io) child_entry;
+ /* Children lock */
+ pthread_spinlock_t lock;
+
+ /* Trace group id */
+ uint64_t trace;
+
+ /* Used by retry and write completion queues */
+ TAILQ_ENTRY(ftl_io) ioch_entry;
+};
+
+/* Metadata IO */
+struct ftl_md_io {
+ /* Parent IO structure */
+ struct ftl_io io;
+
+ /* Serialization/deserialization callback */
+ ftl_md_pack_fn pack_fn;
+
+ /* Callback's function */
+ ftl_io_fn cb_fn;
+
+ /* Callback's context */
+ void *cb_ctx;
+};
+
+static inline bool
+ftl_io_mode_physical(const struct ftl_io *io)
+{
+ return io->flags & FTL_IO_PHYSICAL_MODE;
+}
+
+static inline bool
+ftl_io_mode_logical(const struct ftl_io *io)
+{
+ return !ftl_io_mode_physical(io);
+}
+
+static inline bool
+ftl_io_done(const struct ftl_io *io)
+{
+ return io->req_cnt == 0 && io->pos == io->num_blocks;
+}
+
+struct ftl_io *ftl_io_alloc(struct spdk_io_channel *ch);
+struct ftl_io *ftl_io_alloc_child(struct ftl_io *parent);
+void ftl_io_fail(struct ftl_io *io, int status);
+void ftl_io_free(struct ftl_io *io);
+struct ftl_io *ftl_io_init_internal(const struct ftl_io_init_opts *opts);
+void ftl_io_reinit(struct ftl_io *io, ftl_io_fn cb,
+ void *ctx, int flags, int type);
+void ftl_io_clear(struct ftl_io *io);
+void ftl_io_inc_req(struct ftl_io *io);
+void ftl_io_dec_req(struct ftl_io *io);
+struct iovec *ftl_io_iovec(struct ftl_io *io);
+uint64_t ftl_io_current_lba(const struct ftl_io *io);
+uint64_t ftl_io_get_lba(const struct ftl_io *io, size_t offset);
+void ftl_io_advance(struct ftl_io *io, size_t num_blocks);
+size_t ftl_iovec_num_blocks(struct iovec *iov, size_t iov_cnt);
+void *ftl_io_iovec_addr(struct ftl_io *io);
+size_t ftl_io_iovec_len_left(struct ftl_io *io);
+struct ftl_io *ftl_io_wbuf_init(struct spdk_ftl_dev *dev, struct ftl_addr addr,
+ struct ftl_band *band, struct ftl_batch *batch, ftl_io_fn cb);
+struct ftl_io *ftl_io_erase_init(struct ftl_band *band, size_t num_blocks, ftl_io_fn cb);
+struct ftl_io *ftl_io_user_init(struct spdk_io_channel *ioch, uint64_t lba, size_t num_blocks,
+ struct iovec *iov, size_t iov_cnt, spdk_ftl_fn cb_fn,
+ void *cb_arg, int type);
+void *ftl_io_get_md(const struct ftl_io *io);
+void ftl_io_complete(struct ftl_io *io);
+void ftl_io_shrink_iovec(struct ftl_io *io, size_t num_blocks);
+void ftl_io_process_error(struct ftl_io *io, const struct spdk_nvme_cpl *status);
+void ftl_io_reset(struct ftl_io *io);
+void ftl_io_call_foreach_child(struct ftl_io *io, int (*callback)(struct ftl_io *));
+
+#endif /* FTL_IO_H */
diff --git a/src/spdk/lib/ftl/ftl_reloc.c b/src/spdk/lib/ftl/ftl_reloc.c
new file mode 100644
index 000000000..e59bf4d81
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_reloc.c
@@ -0,0 +1,860 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/likely.h"
+#include "spdk_internal/log.h"
+#include "spdk/ftl.h"
+
+#include "ftl_reloc.h"
+#include "ftl_core.h"
+#include "ftl_io.h"
+#include "ftl_band.h"
+#include "ftl_debug.h"
+
+/* Maximum active reloc moves */
+#define FTL_RELOC_MAX_MOVES 256
+
+struct ftl_reloc;
+struct ftl_band_reloc;
+
+enum ftl_reloc_move_state {
+ FTL_RELOC_STATE_READ_LBA_MAP,
+ FTL_RELOC_STATE_READ,
+ FTL_RELOC_STATE_WRITE,
+};
+
+enum ftl_band_reloc_state {
+ FTL_BAND_RELOC_STATE_INACTIVE,
+ FTL_BAND_RELOC_STATE_PENDING,
+ FTL_BAND_RELOC_STATE_ACTIVE,
+ FTL_BAND_RELOC_STATE_HIGH_PRIO
+};
+
+struct ftl_reloc_move {
+ struct ftl_band_reloc *breloc;
+
+ /* Start addr */
+ struct ftl_addr addr;
+
+ /* Number of logical blocks */
+ size_t num_blocks;
+
+ /* Data buffer */
+ void *data;
+
+ /* Move state (read lba_map, read, write) */
+ enum ftl_reloc_move_state state;
+
+ /* IO associated with move */
+ struct ftl_io *io;
+
+ STAILQ_ENTRY(ftl_reloc_move) entry;
+};
+
+struct ftl_band_reloc {
+ struct ftl_reloc *parent;
+
+ /* Band being relocated */
+ struct ftl_band *band;
+
+ /* Number of logical blocks to be relocated */
+ size_t num_blocks;
+
+ /* Bitmap of logical blocks to be relocated */
+ struct spdk_bit_array *reloc_map;
+
+ /* State of the band reloc */
+ enum ftl_band_reloc_state state;
+
+ /* The band is being defragged */
+ bool defrag;
+
+ /* Reloc map iterator */
+ struct {
+ /* Array of zone offsets */
+ size_t *zone_offset;
+
+ /* Current zone */
+ size_t zone_current;
+ } iter;
+
+ /* Number of outstanding moves */
+ size_t num_outstanding;
+
+ /* Pool of move objects */
+ struct ftl_reloc_move *moves;
+
+ /* Move queue */
+ STAILQ_HEAD(, ftl_reloc_move) move_queue;
+
+ TAILQ_ENTRY(ftl_band_reloc) entry;
+};
+
+struct ftl_reloc {
+ /* Device associated with relocate */
+ struct spdk_ftl_dev *dev;
+
+ /* Indicates relocate is about to halt */
+ bool halt;
+
+ /* Maximum number of IOs per band */
+ size_t max_qdepth;
+
+ /* Maximum number of active band relocates */
+ size_t max_active;
+
+ /* Maximum transfer size (in logical blocks) per single IO */
+ size_t xfer_size;
+ /* Number of bands being defragged */
+ size_t num_defrag_bands;
+
+ /* Array of band relocates */
+ struct ftl_band_reloc *brelocs;
+
+ /* Number of active/priority band relocates */
+ size_t num_active;
+
+ /* Priority band relocates queue */
+ TAILQ_HEAD(, ftl_band_reloc) prio_queue;
+
+ /* Active band relocates queue */
+ TAILQ_HEAD(, ftl_band_reloc) active_queue;
+
+ /* Pending band relocates queue */
+ TAILQ_HEAD(, ftl_band_reloc) pending_queue;
+};
+
+bool
+ftl_reloc_is_defrag_active(const struct ftl_reloc *reloc)
+{
+ return reloc->num_defrag_bands > 0;
+}
+
+static size_t
+ftl_reloc_iter_zone_offset(struct ftl_band_reloc *breloc)
+{
+ size_t zone = breloc->iter.zone_current;
+
+ return breloc->iter.zone_offset[zone];
+}
+
+static size_t
+ftl_reloc_iter_zone_done(struct ftl_band_reloc *breloc)
+{
+ size_t num_blocks = ftl_get_num_blocks_in_zone(breloc->parent->dev);
+
+ return ftl_reloc_iter_zone_offset(breloc) == num_blocks;
+}
+
+static void
+ftl_reloc_clr_block(struct ftl_band_reloc *breloc, size_t block_off)
+{
+ if (!spdk_bit_array_get(breloc->reloc_map, block_off)) {
+ return;
+ }
+
+ spdk_bit_array_clear(breloc->reloc_map, block_off);
+ assert(breloc->num_blocks);
+ breloc->num_blocks--;
+}
+
+static void
+ftl_reloc_read_lba_map_cb(struct ftl_io *io, void *arg, int status)
+{
+ struct ftl_reloc_move *move = arg;
+ struct ftl_band_reloc *breloc = move->breloc;
+
+ breloc->num_outstanding--;
+ assert(status == 0);
+ move->state = FTL_RELOC_STATE_WRITE;
+ STAILQ_INSERT_TAIL(&breloc->move_queue, move, entry);
+}
+
+static int
+ftl_reloc_read_lba_map(struct ftl_band_reloc *breloc, struct ftl_reloc_move *move)
+{
+ struct ftl_band *band = breloc->band;
+
+ breloc->num_outstanding++;
+ return ftl_band_read_lba_map(band, ftl_band_block_offset_from_addr(band, move->addr),
+ move->num_blocks, ftl_reloc_read_lba_map_cb, move);
+}
+
+static void
+ftl_reloc_prep(struct ftl_band_reloc *breloc)
+{
+ struct ftl_band *band = breloc->band;
+ struct ftl_reloc *reloc = breloc->parent;
+ struct ftl_reloc_move *move;
+ size_t i;
+
+ reloc->num_active++;
+
+ if (!band->high_prio) {
+ if (ftl_band_alloc_lba_map(band)) {
+ SPDK_ERRLOG("Failed to allocate lba map\n");
+ assert(false);
+ }
+ } else {
+ ftl_band_acquire_lba_map(band);
+ }
+
+ for (i = 0; i < reloc->max_qdepth; ++i) {
+ move = &breloc->moves[i];
+ move->state = FTL_RELOC_STATE_READ;
+ STAILQ_INSERT_TAIL(&breloc->move_queue, move, entry);
+ }
+}
+
+static void
+ftl_reloc_free_move(struct ftl_band_reloc *breloc, struct ftl_reloc_move *move)
+{
+ assert(move);
+ spdk_dma_free(move->data);
+ memset(move, 0, sizeof(*move));
+ move->state = FTL_RELOC_STATE_READ;
+}
+
+static void
+ftl_reloc_write_cb(struct ftl_io *io, void *arg, int status)
+{
+ struct ftl_reloc_move *move = arg;
+ struct ftl_addr addr = move->addr;
+ struct ftl_band_reloc *breloc = move->breloc;
+ size_t i;
+
+ breloc->num_outstanding--;
+
+ if (status) {
+ SPDK_ERRLOG("Reloc write failed with status: %d\n", status);
+ assert(false);
+ return;
+ }
+
+ for (i = 0; i < move->num_blocks; ++i) {
+ addr.offset = move->addr.offset + i;
+ size_t block_off = ftl_band_block_offset_from_addr(breloc->band, addr);
+ ftl_reloc_clr_block(breloc, block_off);
+ }
+
+ ftl_reloc_free_move(breloc, move);
+ STAILQ_INSERT_TAIL(&breloc->move_queue, move, entry);
+}
+
+static void
+ftl_reloc_read_cb(struct ftl_io *io, void *arg, int status)
+{
+ struct ftl_reloc_move *move = arg;
+ struct ftl_band_reloc *breloc = move->breloc;
+
+ breloc->num_outstanding--;
+
+ /* TODO: We should handle fail on relocation read. We need to inform */
+ /* user that this group of blocks is bad (update l2p with bad block address and */
+ /* put it to lba_map/sector_lba). Maybe we could also retry read with smaller granularity? */
+ if (status) {
+ SPDK_ERRLOG("Reloc read failed with status: %d\n", status);
+ assert(false);
+ return;
+ }
+
+ move->state = FTL_RELOC_STATE_READ_LBA_MAP;
+ move->io = NULL;
+ STAILQ_INSERT_TAIL(&breloc->move_queue, move, entry);
+}
+
+static void
+ftl_reloc_iter_reset(struct ftl_band_reloc *breloc)
+{
+ memset(breloc->iter.zone_offset, 0, ftl_get_num_punits(breloc->band->dev) *
+ sizeof(*breloc->iter.zone_offset));
+ breloc->iter.zone_current = 0;
+}
+
+static size_t
+ftl_reloc_iter_block_offset(struct ftl_band_reloc *breloc)
+{
+ size_t zone_offset = breloc->iter.zone_current * ftl_get_num_blocks_in_zone(breloc->parent->dev);
+
+ return breloc->iter.zone_offset[breloc->iter.zone_current] + zone_offset;
+}
+
+static void
+ftl_reloc_iter_next_zone(struct ftl_band_reloc *breloc)
+{
+ size_t num_zones = ftl_get_num_punits(breloc->band->dev);
+
+ breloc->iter.zone_current = (breloc->iter.zone_current + 1) % num_zones;
+}
+
+static int
+ftl_reloc_block_valid(struct ftl_band_reloc *breloc, size_t block_off)
+{
+ struct ftl_addr addr = ftl_band_addr_from_block_offset(breloc->band, block_off);
+
+ return ftl_addr_is_written(breloc->band, addr) &&
+ spdk_bit_array_get(breloc->reloc_map, block_off) &&
+ ftl_band_block_offset_valid(breloc->band, block_off);
+}
+
+static int
+ftl_reloc_iter_next(struct ftl_band_reloc *breloc, size_t *block_off)
+{
+ size_t zone = breloc->iter.zone_current;
+
+ *block_off = ftl_reloc_iter_block_offset(breloc);
+
+ if (ftl_reloc_iter_zone_done(breloc)) {
+ return 0;
+ }
+
+ breloc->iter.zone_offset[zone]++;
+
+ if (!ftl_reloc_block_valid(breloc, *block_off)) {
+ ftl_reloc_clr_block(breloc, *block_off);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int
+ftl_reloc_first_valid_block(struct ftl_band_reloc *breloc, size_t *block_off)
+{
+ size_t i, num_blocks = ftl_get_num_blocks_in_zone(breloc->parent->dev);
+
+ for (i = ftl_reloc_iter_zone_offset(breloc); i < num_blocks; ++i) {
+ if (ftl_reloc_iter_next(breloc, block_off)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+ftl_reloc_iter_done(struct ftl_band_reloc *breloc)
+{
+ size_t i;
+ size_t num_zones = ftl_get_num_punits(breloc->band->dev);
+ size_t num_blocks = ftl_get_num_blocks_in_zone(breloc->parent->dev);
+
+ for (i = 0; i < num_zones; ++i) {
+ if (breloc->iter.zone_offset[i] != num_blocks) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static size_t
+ftl_reloc_find_valid_blocks(struct ftl_band_reloc *breloc,
+ size_t _num_blocks, struct ftl_addr *addr)
+{
+ size_t block_off, num_blocks = 0;
+
+ if (!ftl_reloc_first_valid_block(breloc, &block_off)) {
+ return 0;
+ }
+
+ *addr = ftl_band_addr_from_block_offset(breloc->band, block_off);
+
+ for (num_blocks = 1; num_blocks < _num_blocks; num_blocks++) {
+ if (!ftl_reloc_iter_next(breloc, &block_off)) {
+ break;
+ }
+ }
+
+ return num_blocks;
+}
+
+static size_t
+ftl_reloc_next_blocks(struct ftl_band_reloc *breloc, struct ftl_addr *addr)
+{
+ size_t i, num_blocks = 0;
+ struct spdk_ftl_dev *dev = breloc->parent->dev;
+
+ for (i = 0; i < ftl_get_num_punits(dev); ++i) {
+ num_blocks = ftl_reloc_find_valid_blocks(breloc, breloc->parent->xfer_size, addr);
+ ftl_reloc_iter_next_zone(breloc);
+
+ if (num_blocks || ftl_reloc_iter_done(breloc)) {
+ break;
+ }
+ }
+
+ return num_blocks;
+}
+
+static struct ftl_io *
+ftl_reloc_io_init(struct ftl_band_reloc *breloc, struct ftl_reloc_move *move,
+ ftl_io_fn fn, enum ftl_io_type io_type, int flags)
+{
+ size_t block_off, i;
+ struct ftl_addr addr = move->addr;
+ struct ftl_io *io = NULL;
+ struct ftl_io_init_opts opts = {
+ .dev = breloc->parent->dev,
+ .band = breloc->band,
+ .size = sizeof(*io),
+ .flags = flags | FTL_IO_INTERNAL | FTL_IO_PHYSICAL_MODE,
+ .type = io_type,
+ .num_blocks = move->num_blocks,
+ .iovs = {
+ {
+ .iov_base = move->data,
+ .iov_len = move->num_blocks * FTL_BLOCK_SIZE,
+ }
+ },
+ .iovcnt = 1,
+ .cb_fn = fn,
+ };
+
+ io = ftl_io_init_internal(&opts);
+ if (!io) {
+ return NULL;
+ }
+
+ io->cb_ctx = move;
+ io->addr = move->addr;
+
+ if (flags & FTL_IO_VECTOR_LBA) {
+ for (i = 0; i < io->num_blocks; ++i, ++addr.offset) {
+ block_off = ftl_band_block_offset_from_addr(breloc->band, addr);
+
+ if (!ftl_band_block_offset_valid(breloc->band, block_off)) {
+ io->lba.vector[i] = FTL_LBA_INVALID;
+ continue;
+ }
+
+ io->lba.vector[i] = breloc->band->lba_map.map[block_off];
+ }
+ }
+
+ ftl_trace_lba_io_init(io->dev, io);
+
+ return io;
+}
+
+static int
+ftl_reloc_write(struct ftl_band_reloc *breloc, struct ftl_reloc_move *move)
+{
+ int io_flags = FTL_IO_WEAK | FTL_IO_VECTOR_LBA | FTL_IO_BYPASS_CACHE;
+
+ if (spdk_likely(!move->io)) {
+ move->io = ftl_reloc_io_init(breloc, move, ftl_reloc_write_cb,
+ FTL_IO_WRITE, io_flags);
+ if (!move->io) {
+ ftl_reloc_free_move(breloc, move);
+ STAILQ_INSERT_TAIL(&breloc->move_queue, move, entry);
+ return -ENOMEM;
+ }
+ }
+
+ breloc->num_outstanding++;
+ ftl_io_write(move->io);
+ return 0;
+}
+
+static int
+ftl_reloc_read(struct ftl_band_reloc *breloc, struct ftl_reloc_move *move)
+{
+ struct ftl_addr addr = {};
+
+ move->num_blocks = ftl_reloc_next_blocks(breloc, &addr);
+ move->breloc = breloc;
+ move->addr = addr;
+
+ if (!move->num_blocks) {
+ return 0;
+ }
+
+ move->data = spdk_dma_malloc(FTL_BLOCK_SIZE * move->num_blocks, 4096, NULL);
+ if (!move->data) {
+ return -1;
+ }
+
+ move->io = ftl_reloc_io_init(breloc, move, ftl_reloc_read_cb, FTL_IO_READ, 0);
+ if (!move->io) {
+ ftl_reloc_free_move(breloc, move);
+ STAILQ_INSERT_TAIL(&breloc->move_queue, move, entry);
+ SPDK_ERRLOG("Failed to initialize io for relocation.");
+ return -1;
+ }
+
+ breloc->num_outstanding++;
+ ftl_io_read(move->io);
+ return 0;
+}
+
+static void
+ftl_reloc_process_moves(struct ftl_band_reloc *breloc)
+{
+ struct ftl_reloc_move *move;
+ STAILQ_HEAD(, ftl_reloc_move) move_queue;
+ int rc = 0;
+
+ /*
+ * When IO allocation fails, we do not want to retry immediately so keep moves on
+ * temporary queue
+ */
+ STAILQ_INIT(&move_queue);
+ STAILQ_SWAP(&breloc->move_queue, &move_queue, ftl_reloc_move);
+
+ while (!STAILQ_EMPTY(&move_queue)) {
+ move = STAILQ_FIRST(&move_queue);
+ STAILQ_REMOVE_HEAD(&move_queue, entry);
+
+ switch (move->state) {
+ case FTL_RELOC_STATE_READ_LBA_MAP:
+ rc = ftl_reloc_read_lba_map(breloc, move);
+ break;
+ case FTL_RELOC_STATE_READ:
+ rc = ftl_reloc_read(breloc, move);
+ break;
+ case FTL_RELOC_STATE_WRITE:
+ rc = ftl_reloc_write(breloc, move);
+ break;
+ default:
+ assert(false);
+ break;
+ }
+
+ if (rc) {
+ SPDK_ERRLOG("Move queue processing failed\n");
+ assert(false);
+ }
+ }
+}
+
+static bool
+ftl_reloc_done(struct ftl_band_reloc *breloc)
+{
+ return !breloc->num_outstanding && STAILQ_EMPTY(&breloc->move_queue);
+}
+
+static void
+ftl_reloc_release(struct ftl_band_reloc *breloc)
+{
+ struct ftl_reloc *reloc = breloc->parent;
+ struct ftl_band *band = breloc->band;
+
+ ftl_reloc_iter_reset(breloc);
+ ftl_band_release_lba_map(band);
+ reloc->num_active--;
+
+ if (breloc->state == FTL_BAND_RELOC_STATE_HIGH_PRIO) {
+ /* High prio band must be relocated as a whole and ANM events will be ignored */
+ assert(breloc->num_blocks == 0 && ftl_band_empty(band));
+ TAILQ_REMOVE(&reloc->prio_queue, breloc, entry);
+ band->high_prio = 0;
+ breloc->state = FTL_BAND_RELOC_STATE_INACTIVE;
+ } else {
+ assert(breloc->state == FTL_BAND_RELOC_STATE_ACTIVE);
+ TAILQ_REMOVE(&reloc->active_queue, breloc, entry);
+ breloc->state = FTL_BAND_RELOC_STATE_INACTIVE;
+
+ /* If we got ANM event during relocation put such band back to pending queue */
+ if (breloc->num_blocks != 0) {
+ breloc->state = FTL_BAND_RELOC_STATE_PENDING;
+ TAILQ_INSERT_TAIL(&reloc->pending_queue, breloc, entry);
+ return;
+ }
+ }
+
+ if (ftl_band_empty(band) && band->state == FTL_BAND_STATE_CLOSED) {
+ ftl_band_set_state(breloc->band, FTL_BAND_STATE_FREE);
+
+ if (breloc->defrag) {
+ breloc->defrag = false;
+ assert(reloc->num_defrag_bands > 0);
+ reloc->num_defrag_bands--;
+ }
+ }
+}
+
+static void
+ftl_process_reloc(struct ftl_band_reloc *breloc)
+{
+ ftl_reloc_process_moves(breloc);
+
+ if (ftl_reloc_done(breloc)) {
+ ftl_reloc_release(breloc);
+ }
+}
+
+static int
+ftl_band_reloc_init(struct ftl_reloc *reloc, struct ftl_band_reloc *breloc,
+ struct ftl_band *band)
+{
+ breloc->band = band;
+ breloc->parent = reloc;
+
+ breloc->reloc_map = spdk_bit_array_create(ftl_get_num_blocks_in_band(reloc->dev));
+ if (!breloc->reloc_map) {
+ SPDK_ERRLOG("Failed to initialize reloc map");
+ return -1;
+ }
+
+ breloc->iter.zone_offset = calloc(ftl_get_num_punits(band->dev),
+ sizeof(*breloc->iter.zone_offset));
+ if (!breloc->iter.zone_offset) {
+ SPDK_ERRLOG("Failed to initialize reloc iterator");
+ return -1;
+ }
+
+ STAILQ_INIT(&breloc->move_queue);
+
+ breloc->moves = calloc(reloc->max_qdepth, sizeof(*breloc->moves));
+ if (!breloc->moves) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+ftl_band_reloc_free(struct ftl_band_reloc *breloc)
+{
+ struct ftl_reloc_move *move;
+
+ if (!breloc) {
+ return;
+ }
+
+ assert(breloc->num_outstanding == 0);
+
+ /* Drain write queue if there is active band relocation during shutdown */
+ if (breloc->state == FTL_BAND_RELOC_STATE_ACTIVE ||
+ breloc->state == FTL_BAND_RELOC_STATE_HIGH_PRIO) {
+ assert(breloc->parent->halt);
+ STAILQ_FOREACH(move, &breloc->move_queue, entry) {
+ ftl_reloc_free_move(breloc, move);
+ }
+ }
+
+ spdk_bit_array_free(&breloc->reloc_map);
+ free(breloc->iter.zone_offset);
+ free(breloc->moves);
+}
+
+struct ftl_reloc *
+ftl_reloc_init(struct spdk_ftl_dev *dev)
+{
+ struct ftl_reloc *reloc;
+ size_t i;
+
+ reloc = calloc(1, sizeof(*reloc));
+ if (!reloc) {
+ return NULL;
+ }
+
+ reloc->dev = dev;
+ reloc->halt = true;
+ reloc->max_qdepth = dev->conf.max_reloc_qdepth;
+ reloc->max_active = dev->conf.max_active_relocs;
+ reloc->xfer_size = dev->xfer_size;
+ reloc->num_defrag_bands = 0;
+
+ if (reloc->max_qdepth > FTL_RELOC_MAX_MOVES) {
+ goto error;
+ }
+
+ reloc->brelocs = calloc(ftl_get_num_bands(dev), sizeof(*reloc->brelocs));
+ if (!reloc->brelocs) {
+ goto error;
+ }
+
+ for (i = 0; i < ftl_get_num_bands(reloc->dev); ++i) {
+ if (ftl_band_reloc_init(reloc, &reloc->brelocs[i], &dev->bands[i])) {
+ goto error;
+ }
+ }
+
+ TAILQ_INIT(&reloc->pending_queue);
+ TAILQ_INIT(&reloc->active_queue);
+ TAILQ_INIT(&reloc->prio_queue);
+
+ return reloc;
+error:
+ ftl_reloc_free(reloc);
+ return NULL;
+}
+
+void
+ftl_reloc_free(struct ftl_reloc *reloc)
+{
+ size_t i;
+
+ if (!reloc) {
+ return;
+ }
+
+ for (i = 0; i < ftl_get_num_bands(reloc->dev); ++i) {
+ ftl_band_reloc_free(&reloc->brelocs[i]);
+ }
+
+ free(reloc->brelocs);
+ free(reloc);
+}
+
+bool
+ftl_reloc_is_halted(const struct ftl_reloc *reloc)
+{
+ return reloc->halt;
+}
+
+void
+ftl_reloc_halt(struct ftl_reloc *reloc)
+{
+ reloc->halt = true;
+}
+
+void
+ftl_reloc_resume(struct ftl_reloc *reloc)
+{
+ reloc->halt = false;
+}
+
+void
+ftl_reloc(struct ftl_reloc *reloc)
+{
+ struct ftl_band_reloc *breloc, *tbreloc;
+
+ if (ftl_reloc_is_halted(reloc)) {
+ return;
+ }
+
+ /* Process first band from priority queue and return */
+ breloc = TAILQ_FIRST(&reloc->prio_queue);
+ if (breloc) {
+ ftl_process_reloc(breloc);
+ return;
+ }
+
+ TAILQ_FOREACH_SAFE(breloc, &reloc->pending_queue, entry, tbreloc) {
+ if (reloc->num_active == reloc->max_active) {
+ break;
+ }
+
+ /* Wait for band to close before relocating */
+ if (breloc->band->state != FTL_BAND_STATE_CLOSED) {
+ continue;
+ }
+
+ ftl_reloc_prep(breloc);
+ assert(breloc->state == FTL_BAND_RELOC_STATE_PENDING);
+ TAILQ_REMOVE(&reloc->pending_queue, breloc, entry);
+ breloc->state = FTL_BAND_RELOC_STATE_ACTIVE;
+ TAILQ_INSERT_HEAD(&reloc->active_queue, breloc, entry);
+ }
+
+ TAILQ_FOREACH_SAFE(breloc, &reloc->active_queue, entry, tbreloc) {
+ assert(breloc->state == FTL_BAND_RELOC_STATE_ACTIVE);
+ ftl_process_reloc(breloc);
+ }
+}
+
+void
+ftl_reloc_add(struct ftl_reloc *reloc, struct ftl_band *band, size_t offset,
+ size_t num_blocks, int prio, bool is_defrag)
+{
+ struct ftl_band_reloc *breloc = &reloc->brelocs[band->id];
+ size_t i;
+
+ /* No need to add anything if already at high prio - whole band should be relocated */
+ if (!prio && band->high_prio) {
+ return;
+ }
+
+ pthread_spin_lock(&band->lba_map.lock);
+ if (band->lba_map.num_vld == 0) {
+ pthread_spin_unlock(&band->lba_map.lock);
+
+ /* If the band is closed and has no valid blocks, free it */
+ if (band->state == FTL_BAND_STATE_CLOSED) {
+ ftl_band_set_state(band, FTL_BAND_STATE_FREE);
+ }
+
+ return;
+ }
+ pthread_spin_unlock(&band->lba_map.lock);
+
+ for (i = offset; i < offset + num_blocks; ++i) {
+ if (spdk_bit_array_get(breloc->reloc_map, i)) {
+ continue;
+ }
+ spdk_bit_array_set(breloc->reloc_map, i);
+ breloc->num_blocks++;
+ }
+
+ /* If the band is coming from the defrag process, mark it appropriately */
+ if (is_defrag) {
+ assert(offset == 0 && num_blocks == ftl_get_num_blocks_in_band(band->dev));
+ reloc->num_defrag_bands++;
+ breloc->defrag = true;
+ }
+
+ if (!prio) {
+ if (breloc->state == FTL_BAND_RELOC_STATE_INACTIVE) {
+ breloc->state = FTL_BAND_RELOC_STATE_PENDING;
+ TAILQ_INSERT_HEAD(&reloc->pending_queue, breloc, entry);
+ }
+ } else {
+ bool active = false;
+ /* If priority band is already on pending or active queue, remove it from it */
+ switch (breloc->state) {
+ case FTL_BAND_RELOC_STATE_PENDING:
+ TAILQ_REMOVE(&reloc->pending_queue, breloc, entry);
+ break;
+ case FTL_BAND_RELOC_STATE_ACTIVE:
+ active = true;
+ TAILQ_REMOVE(&reloc->active_queue, breloc, entry);
+ break;
+ default:
+ break;
+ }
+
+ breloc->state = FTL_BAND_RELOC_STATE_HIGH_PRIO;
+ TAILQ_INSERT_TAIL(&reloc->prio_queue, breloc, entry);
+
+ /*
+ * If band has been already on active queue it doesn't need any additional
+ * resources
+ */
+ if (!active) {
+ ftl_reloc_prep(breloc);
+ }
+ }
+}
diff --git a/src/spdk/lib/ftl/ftl_reloc.h b/src/spdk/lib/ftl/ftl_reloc.h
new file mode 100644
index 000000000..21f49a47d
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_reloc.h
@@ -0,0 +1,53 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FTL_RELOC_H
+#define FTL_RELOC_H
+
+#include "spdk/stdinc.h"
+#include "spdk/ftl.h"
+
+struct ftl_reloc;
+struct ftl_band;
+
+struct ftl_reloc *ftl_reloc_init(struct spdk_ftl_dev *dev);
+void ftl_reloc_free(struct ftl_reloc *reloc);
+void ftl_reloc_add(struct ftl_reloc *reloc, struct ftl_band *band,
+ size_t offset, size_t num_blocks, int prio, bool is_defrag);
+void ftl_reloc(struct ftl_reloc *reloc);
+void ftl_reloc_halt(struct ftl_reloc *reloc);
+void ftl_reloc_resume(struct ftl_reloc *reloc);
+bool ftl_reloc_is_halted(const struct ftl_reloc *reloc);
+bool ftl_reloc_is_defrag_active(const struct ftl_reloc *reloc);
+
+#endif /* FTL_RELOC_H */
diff --git a/src/spdk/lib/ftl/ftl_restore.c b/src/spdk/lib/ftl/ftl_restore.c
new file mode 100644
index 000000000..6f626645d
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_restore.c
@@ -0,0 +1,1350 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/stdinc.h"
+#include "spdk/ftl.h"
+#include "spdk/util.h"
+#include "spdk/likely.h"
+#include "spdk/string.h"
+#include "spdk/crc32.h"
+
+#include "ftl_core.h"
+#include "ftl_band.h"
+#include "ftl_io.h"
+
+struct ftl_restore_band {
+ struct ftl_restore *parent;
+ /* Associated band */
+ struct ftl_band *band;
+ /* Status of retrieving this band's metadata */
+ enum ftl_md_status md_status;
+ /* Padded queue link */
+ STAILQ_ENTRY(ftl_restore_band) stailq;
+};
+
+struct ftl_nv_cache_restore;
+
+/* Describes single phase to be restored from non-volatile cache */
+struct ftl_nv_cache_range {
+ struct ftl_nv_cache_restore *parent;
+ /* Start offset */
+ uint64_t start_addr;
+ /* Last block's address */
+ uint64_t last_addr;
+ /*
+ * Number of blocks (can be smaller than the difference between the last
+ * and the starting block due to range overlap)
+ */
+ uint64_t num_blocks;
+ /* Number of blocks already recovered */
+ uint64_t num_recovered;
+ /* Current address during recovery */
+ uint64_t current_addr;
+ /* Phase of the range */
+ unsigned int phase;
+ /* Indicates whether the data from this range needs to be recovered */
+ bool recovery;
+};
+
+struct ftl_nv_cache_block {
+ struct ftl_nv_cache_restore *parent;
+ /* Data buffer */
+ void *buf;
+ /* Metadata buffer */
+ void *md_buf;
+ /* Block offset within the cache */
+ uint64_t offset;
+};
+
+struct ftl_nv_cache_restore {
+ struct ftl_nv_cache *nv_cache;
+ /* IO channel to use */
+ struct spdk_io_channel *ioch;
+ /*
+ * Non-volatile cache ranges. The ranges can overlap, as we have no
+ * control over the order of completions. The phase of the range is the
+ * index within the table. The range with index 0 marks blocks that were
+ * never written.
+ */
+ struct ftl_nv_cache_range range[FTL_NV_CACHE_PHASE_COUNT];
+#define FTL_NV_CACHE_RESTORE_DEPTH 128
+ /* Non-volatile cache buffers */
+ struct ftl_nv_cache_block block[FTL_NV_CACHE_RESTORE_DEPTH];
+ /* Current address */
+ uint64_t current_addr;
+ /* Number of outstanding requests */
+ size_t num_outstanding;
+ /* Recovery/scan status */
+ int status;
+ /* Current phase of the recovery */
+ unsigned int phase;
+};
+
+struct ftl_restore {
+ struct spdk_ftl_dev *dev;
+ /* Completion callback (called for each phase of the restoration) */
+ ftl_restore_fn cb;
+ /* Completion callback context */
+ void *cb_arg;
+ /* Number of inflight IOs */
+ unsigned int num_ios;
+ /* Current band number (index in the below bands array) */
+ unsigned int current;
+ /* Array of bands */
+ struct ftl_restore_band *bands;
+ /* Queue of bands to be padded (due to unsafe shutdown) */
+ STAILQ_HEAD(, ftl_restore_band) pad_bands;
+ /* Status of the padding */
+ int pad_status;
+ /* Metadata buffer */
+ void *md_buf;
+ /* LBA map buffer */
+ void *lba_map;
+ /* Indicates we're in the final phase of the restoration */
+ bool final_phase;
+ /* Non-volatile cache recovery */
+ struct ftl_nv_cache_restore nv_cache;
+};
+
+static int
+ftl_restore_tail_md(struct ftl_restore_band *rband);
+static void
+ftl_pad_zone_cb(struct ftl_io *io, void *arg, int status);
+static void
+ftl_restore_pad_band(struct ftl_restore_band *rband);
+
+static void
+ftl_restore_free(struct ftl_restore *restore)
+{
+ unsigned int i;
+
+ if (!restore) {
+ return;
+ }
+
+ for (i = 0; i < FTL_NV_CACHE_RESTORE_DEPTH; ++i) {
+ spdk_dma_free(restore->nv_cache.block[i].buf);
+ }
+
+ spdk_dma_free(restore->md_buf);
+ free(restore->bands);
+ free(restore);
+}
+
+static struct ftl_restore *
+ftl_restore_init(struct spdk_ftl_dev *dev, ftl_restore_fn cb, void *cb_arg)
+{
+ struct ftl_restore *restore;
+ struct ftl_restore_band *rband;
+ size_t i;
+
+ restore = calloc(1, sizeof(*restore));
+ if (!restore) {
+ goto error;
+ }
+
+ restore->dev = dev;
+ restore->cb = cb;
+ restore->cb_arg = cb_arg;
+ restore->final_phase = false;
+
+ restore->bands = calloc(ftl_get_num_bands(dev), sizeof(*restore->bands));
+ if (!restore->bands) {
+ goto error;
+ }
+
+ STAILQ_INIT(&restore->pad_bands);
+
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ rband = &restore->bands[i];
+ rband->band = &dev->bands[i];
+ rband->parent = restore;
+ rband->md_status = FTL_MD_NO_MD;
+ }
+
+ /* Allocate buffer capable of holding head mds of all bands */
+ restore->md_buf = spdk_dma_zmalloc(ftl_get_num_bands(dev) * ftl_head_md_num_blocks(dev) *
+ FTL_BLOCK_SIZE, 0, NULL);
+ if (!restore->md_buf) {
+ goto error;
+ }
+
+ return restore;
+error:
+ ftl_restore_free(restore);
+ return NULL;
+}
+
+static void
+ftl_restore_complete(struct ftl_restore *restore, int status)
+{
+ struct ftl_restore *ctx = status ? NULL : restore;
+ bool final_phase = restore->final_phase;
+
+ restore->cb(ctx, status, restore->cb_arg);
+ if (status || final_phase) {
+ ftl_restore_free(restore);
+ }
+}
+
+static int
+ftl_band_cmp(const void *lband, const void *rband)
+{
+ uint64_t lseq = ((struct ftl_restore_band *)lband)->band->seq;
+ uint64_t rseq = ((struct ftl_restore_band *)rband)->band->seq;
+
+ if (lseq < rseq) {
+ return -1;
+ } else {
+ return 1;
+ }
+}
+
+static int
+ftl_restore_check_seq(const struct ftl_restore *restore)
+{
+ const struct spdk_ftl_dev *dev = restore->dev;
+ const struct ftl_restore_band *rband;
+ const struct ftl_band *next_band;
+ size_t i;
+
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ rband = &restore->bands[i];
+ if (rband->md_status != FTL_MD_SUCCESS) {
+ continue;
+ }
+
+ next_band = LIST_NEXT(rband->band, list_entry);
+ if (next_band && rband->band->seq == next_band->seq) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static bool
+ftl_restore_head_valid(struct spdk_ftl_dev *dev, struct ftl_restore *restore, size_t *num_valid)
+{
+ struct ftl_restore_band *rband;
+ size_t i;
+
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ rband = &restore->bands[i];
+
+ if (rband->md_status != FTL_MD_SUCCESS &&
+ rband->md_status != FTL_MD_NO_MD &&
+ rband->md_status != FTL_MD_IO_FAILURE) {
+ SPDK_ERRLOG("Inconsistent head metadata found on band %u\n",
+ rband->band->id);
+ return false;
+ }
+
+ if (rband->md_status == FTL_MD_SUCCESS) {
+ (*num_valid)++;
+ }
+ }
+
+ return true;
+}
+
+static void
+ftl_restore_head_complete(struct ftl_restore *restore)
+{
+ struct spdk_ftl_dev *dev = restore->dev;
+ size_t num_valid = 0;
+ int status = -EIO;
+
+ if (!ftl_restore_head_valid(dev, restore, &num_valid)) {
+ goto out;
+ }
+
+ if (num_valid == 0) {
+ SPDK_ERRLOG("Couldn't find any valid bands\n");
+ goto out;
+ }
+
+ /* Sort bands in sequence number ascending order */
+ qsort(restore->bands, ftl_get_num_bands(dev), sizeof(struct ftl_restore_band),
+ ftl_band_cmp);
+
+ if (ftl_restore_check_seq(restore)) {
+ SPDK_ERRLOG("Band sequence consistency failed\n");
+ goto out;
+ }
+
+ dev->num_lbas = dev->global_md.num_lbas;
+ status = 0;
+out:
+ ftl_restore_complete(restore, status);
+}
+
+static void
+ftl_restore_head_cb(struct ftl_io *io, void *ctx, int status)
+{
+ struct ftl_restore_band *rband = ctx;
+ struct ftl_restore *restore = rband->parent;
+ unsigned int num_ios;
+
+ rband->md_status = status;
+ num_ios = __atomic_fetch_sub(&restore->num_ios, 1, __ATOMIC_SEQ_CST);
+ assert(num_ios > 0);
+
+ if (num_ios == 1) {
+ ftl_restore_head_complete(restore);
+ }
+}
+
+static void
+ftl_restore_head_md(void *ctx)
+{
+ struct ftl_restore *restore = ctx;
+ struct spdk_ftl_dev *dev = restore->dev;
+ struct ftl_restore_band *rband;
+ struct ftl_lba_map *lba_map;
+ unsigned int num_failed = 0, num_ios;
+ size_t i;
+
+ restore->num_ios = ftl_get_num_bands(dev);
+
+ for (i = 0; i < ftl_get_num_bands(dev); ++i) {
+ rband = &restore->bands[i];
+ lba_map = &rband->band->lba_map;
+
+ lba_map->dma_buf = restore->md_buf + i * ftl_head_md_num_blocks(dev) * FTL_BLOCK_SIZE;
+
+ if (ftl_band_read_head_md(rband->band, ftl_restore_head_cb, rband)) {
+ if (spdk_likely(rband->band->num_zones)) {
+ SPDK_ERRLOG("Failed to read metadata on band %zu\n", i);
+
+ rband->md_status = FTL_MD_INVALID_CRC;
+
+ /* If the first IO fails, don't bother sending anything else */
+ if (i == 0) {
+ ftl_restore_complete(restore, -EIO);
+ }
+ }
+
+ num_failed++;
+ }
+ }
+
+ if (spdk_unlikely(num_failed > 0)) {
+ num_ios = __atomic_fetch_sub(&restore->num_ios, num_failed, __ATOMIC_SEQ_CST);
+ if (num_ios == num_failed) {
+ ftl_restore_complete(restore, -EIO);
+ }
+ }
+}
+
+int
+ftl_restore_md(struct spdk_ftl_dev *dev, ftl_restore_fn cb, void *cb_arg)
+{
+ struct ftl_restore *restore;
+
+ restore = ftl_restore_init(dev, cb, cb_arg);
+ if (!restore) {
+ return -ENOMEM;
+ }
+
+ spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_restore_head_md, restore);
+
+ return 0;
+}
+
+static int
+ftl_restore_l2p(struct ftl_band *band)
+{
+ struct spdk_ftl_dev *dev = band->dev;
+ struct ftl_addr addr;
+ uint64_t lba;
+ size_t i;
+
+ for (i = 0; i < ftl_get_num_blocks_in_band(band->dev); ++i) {
+ if (!spdk_bit_array_get(band->lba_map.vld, i)) {
+ continue;
+ }
+
+ lba = band->lba_map.map[i];
+ if (lba >= dev->num_lbas) {
+ return -1;
+ }
+
+ addr = ftl_l2p_get(dev, lba);
+ if (!ftl_addr_invalid(addr)) {
+ ftl_invalidate_addr(dev, addr);
+ }
+
+ addr = ftl_band_addr_from_block_offset(band, i);
+
+ ftl_band_set_addr(band, lba, addr);
+ ftl_l2p_set(dev, lba, addr);
+ }
+
+ return 0;
+}
+
+static struct ftl_restore_band *
+ftl_restore_next_band(struct ftl_restore *restore)
+{
+ struct ftl_restore_band *rband;
+
+ for (; restore->current < ftl_get_num_bands(restore->dev); ++restore->current) {
+ rband = &restore->bands[restore->current];
+
+ if (spdk_likely(rband->band->num_zones) &&
+ rband->md_status == FTL_MD_SUCCESS) {
+ restore->current++;
+ return rband;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+ftl_nv_cache_restore_complete(struct ftl_nv_cache_restore *restore, int status)
+{
+ struct ftl_restore *ftl_restore = SPDK_CONTAINEROF(restore, struct ftl_restore, nv_cache);
+
+ restore->status = restore->status ? : status;
+ if (restore->num_outstanding == 0) {
+ ftl_restore_complete(ftl_restore, restore->status);
+ }
+}
+
+static void ftl_nv_cache_block_read_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
+
+static void
+ftl_nv_cache_restore_done(struct ftl_nv_cache_restore *restore, uint64_t current_addr)
+{
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+
+ pthread_spin_lock(&nv_cache->lock);
+ nv_cache->current_addr = current_addr;
+ nv_cache->ready = true;
+ pthread_spin_unlock(&nv_cache->lock);
+
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Enabling non-volatile cache (phase: %u, addr: %"
+ PRIu64")\n", nv_cache->phase, current_addr);
+
+ ftl_nv_cache_restore_complete(restore, 0);
+}
+
+static void
+ftl_nv_cache_write_header_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_nv_cache_restore *restore = cb_arg;
+
+ spdk_bdev_free_io(bdev_io);
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Unable to write the non-volatile cache metadata header\n");
+ ftl_nv_cache_restore_complete(restore, -EIO);
+ return;
+ }
+
+ ftl_nv_cache_restore_done(restore, FTL_NV_CACHE_DATA_OFFSET);
+}
+
+static void
+ftl_nv_cache_scrub_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_nv_cache_restore *restore = cb_arg;
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+ int rc;
+
+ spdk_bdev_free_io(bdev_io);
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Scrubbing non-volatile cache failed\n");
+ ftl_nv_cache_restore_complete(restore, -EIO);
+ return;
+ }
+
+ nv_cache->phase = 1;
+ rc = ftl_nv_cache_write_header(nv_cache, false, ftl_nv_cache_write_header_cb, restore);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to write the non-volatile cache metadata header: %s\n",
+ spdk_strerror(-rc));
+ ftl_nv_cache_restore_complete(restore, -EIO);
+ }
+}
+
+static void
+ftl_nv_cache_scrub_header_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_nv_cache_restore *restore = cb_arg;
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+ int rc;
+
+ spdk_bdev_free_io(bdev_io);
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Unable to write non-volatile cache metadata header\n");
+ ftl_nv_cache_restore_complete(restore, -EIO);
+ return;
+ }
+
+ rc = ftl_nv_cache_scrub(nv_cache, ftl_nv_cache_scrub_cb, restore);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to scrub the non-volatile cache: %s\n", spdk_strerror(-rc));
+ ftl_nv_cache_restore_complete(restore, rc);
+ }
+}
+
+static void
+ftl_nv_cache_band_flush_cb(void *ctx, int status)
+{
+ struct ftl_nv_cache_restore *restore = ctx;
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+ int rc;
+
+ if (spdk_unlikely(status != 0)) {
+ SPDK_ERRLOG("Flushing active bands failed: %s\n", spdk_strerror(-status));
+ ftl_nv_cache_restore_complete(restore, status);
+ return;
+ }
+
+ /*
+ * Use phase 0 to indicate that the cache is being scrubbed. If the power is lost during
+ * this process, we'll know it needs to be resumed.
+ */
+ nv_cache->phase = 0;
+ rc = ftl_nv_cache_write_header(nv_cache, false, ftl_nv_cache_scrub_header_cb, restore);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to write non-volatile cache metadata header: %s\n",
+ spdk_strerror(-rc));
+ ftl_nv_cache_restore_complete(restore, rc);
+ }
+}
+
+static void
+ftl_nv_cache_wbuf_flush_cb(void *ctx, int status)
+{
+ struct ftl_nv_cache_restore *restore = ctx;
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+ struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache);
+ int rc;
+
+ if (spdk_unlikely(status != 0)) {
+ SPDK_ERRLOG("Flushing the write buffer failed: %s\n", spdk_strerror(-status));
+ ftl_nv_cache_restore_complete(restore, status);
+ return;
+ }
+
+ rc = ftl_flush_active_bands(dev, ftl_nv_cache_band_flush_cb, restore);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to flush active bands: %s\n", spdk_strerror(-rc));
+ ftl_nv_cache_restore_complete(restore, rc);
+ }
+}
+
+static void
+ftl_nv_cache_recovery_done(struct ftl_nv_cache_restore *restore)
+{
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+ struct ftl_nv_cache_range *range_prev, *range_current;
+ struct spdk_ftl_dev *dev = SPDK_CONTAINEROF(nv_cache, struct spdk_ftl_dev, nv_cache);
+ struct spdk_bdev *bdev;
+ uint64_t current_addr;
+ int rc;
+
+ range_prev = &restore->range[ftl_nv_cache_prev_phase(nv_cache->phase)];
+ range_current = &restore->range[nv_cache->phase];
+ bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+
+ /*
+ * If there are more than two ranges or the ranges overlap, scrub the non-volatile cache to
+ * make sure that any subsequent power loss will find the cache in usable state
+ */
+ if ((range_prev->num_blocks + range_current->num_blocks < nv_cache->num_data_blocks) ||
+ (range_prev->start_addr < range_current->last_addr &&
+ range_current->start_addr < range_prev->last_addr)) {
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Non-volatile cache inconsistency detected\n");
+
+ rc = ftl_flush_wbuf(dev, ftl_nv_cache_wbuf_flush_cb, restore);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to flush the write buffer: %s\n", spdk_strerror(-rc));
+ ftl_nv_cache_restore_complete(restore, rc);
+ }
+
+ return;
+ }
+
+ /* The latest phase is the one written in the header (set in nvc_cache->phase) */
+ current_addr = range_current->last_addr + 1;
+
+ /*
+ * The first range might be empty (only the header was written) or the range might
+ * end at the last available address, in which case set current address to the
+ * beginning of the device.
+ */
+ if (range_current->num_blocks == 0 || current_addr >= spdk_bdev_get_num_blocks(bdev)) {
+ current_addr = FTL_NV_CACHE_DATA_OFFSET;
+ }
+
+ ftl_nv_cache_restore_done(restore, current_addr);
+}
+
+static void
+ftl_nv_cache_recover_block(struct ftl_nv_cache_block *block)
+{
+ struct ftl_nv_cache_restore *restore = block->parent;
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+ struct ftl_nv_cache_range *range = &restore->range[restore->phase];
+ int rc;
+
+ assert(range->current_addr <= range->last_addr);
+
+ restore->num_outstanding++;
+ block->offset = range->current_addr++;
+ rc = spdk_bdev_read_blocks_with_md(nv_cache->bdev_desc, restore->ioch,
+ block->buf, block->md_buf,
+ block->offset, 1, ftl_nv_cache_block_read_cb,
+ block);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Non-volatile cache restoration failed on block %"PRIu64" (%s)\n",
+ block->offset, spdk_strerror(-rc));
+ restore->num_outstanding--;
+ ftl_nv_cache_restore_complete(restore, rc);
+ }
+}
+
+static void
+ftl_nv_cache_recover_range(struct ftl_nv_cache_restore *restore)
+{
+ struct ftl_nv_cache_range *range;
+ unsigned int phase = restore->phase;
+
+ do {
+ /* Find first range with non-zero number of blocks that is marked for recovery */
+ range = &restore->range[phase];
+ if (range->recovery && range->num_recovered < range->num_blocks) {
+ break;
+ }
+
+ phase = ftl_nv_cache_next_phase(phase);
+ } while (phase != restore->phase);
+
+ /* There are no ranges to be recovered, we're done */
+ if (range->num_recovered == range->num_blocks || !range->recovery) {
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Non-volatile cache recovery done\n");
+ ftl_nv_cache_recovery_done(restore);
+ return;
+ }
+
+ range->current_addr = range->start_addr;
+ restore->phase = phase;
+
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Recovering range %u %"PRIu64"-%"PRIu64" (%"PRIu64")\n",
+ phase, range->start_addr, range->last_addr, range->num_blocks);
+
+ ftl_nv_cache_recover_block(&restore->block[0]);
+}
+
+static void
+ftl_nv_cache_write_cb(struct ftl_io *io, void *cb_arg, int status)
+{
+ struct ftl_nv_cache_block *block = cb_arg;
+ struct ftl_nv_cache_restore *restore = block->parent;
+ struct ftl_nv_cache_range *range = &restore->range[restore->phase];
+
+ restore->num_outstanding--;
+ if (status != 0) {
+ SPDK_ERRLOG("Non-volatile cache restoration failed on block %"PRIu64" (%s)\n",
+ block->offset, spdk_strerror(-status));
+ ftl_nv_cache_restore_complete(restore, -ENOMEM);
+ return;
+ }
+
+ range->num_recovered++;
+ if (range->current_addr <= range->last_addr) {
+ ftl_nv_cache_recover_block(block);
+ } else if (restore->num_outstanding == 0) {
+ assert(range->num_recovered == range->num_blocks);
+ ftl_nv_cache_recover_range(restore);
+ }
+}
+
+static struct ftl_io *
+ftl_nv_cache_alloc_io(struct ftl_nv_cache_block *block, uint64_t lba)
+{
+ struct ftl_restore *restore = SPDK_CONTAINEROF(block->parent, struct ftl_restore, nv_cache);
+ struct ftl_io_init_opts opts = {
+ .dev = restore->dev,
+ .io = NULL,
+ .flags = FTL_IO_BYPASS_CACHE,
+ .type = FTL_IO_WRITE,
+ .num_blocks = 1,
+ .cb_fn = ftl_nv_cache_write_cb,
+ .cb_ctx = block,
+ .iovs = {
+ {
+ .iov_base = block->buf,
+ .iov_len = FTL_BLOCK_SIZE,
+ }
+ },
+ .iovcnt = 1,
+ };
+ struct ftl_io *io;
+
+ io = ftl_io_init_internal(&opts);
+ if (spdk_unlikely(!io)) {
+ return NULL;
+ }
+
+ io->lba.single = lba;
+ return io;
+}
+
+static void
+ftl_nv_cache_block_read_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_nv_cache_block *block = cb_arg;
+ struct ftl_nv_cache_restore *restore = block->parent;
+ struct ftl_nv_cache_range *range = &restore->range[restore->phase];
+ struct ftl_io *io;
+ unsigned int phase;
+ uint64_t lba;
+
+ spdk_bdev_free_io(bdev_io);
+ restore->num_outstanding--;
+
+ if (!success) {
+ SPDK_ERRLOG("Non-volatile cache restoration failed on block %"PRIu64"\n",
+ block->offset);
+ ftl_nv_cache_restore_complete(restore, -EIO);
+ return;
+ }
+
+ ftl_nv_cache_unpack_lba(*(uint64_t *)block->md_buf, &lba, &phase);
+ if (spdk_unlikely(phase != restore->phase)) {
+ if (range->current_addr < range->last_addr) {
+ ftl_nv_cache_recover_block(block);
+ } else if (restore->num_outstanding == 0) {
+ ftl_nv_cache_recover_range(restore);
+ }
+
+ return;
+ }
+
+ io = ftl_nv_cache_alloc_io(block, lba);
+ if (spdk_unlikely(!io)) {
+ SPDK_ERRLOG("Failed to allocate ftl_io during non-volatile cache recovery\n");
+ ftl_nv_cache_restore_complete(restore, -ENOMEM);
+ return;
+ }
+
+ restore->num_outstanding++;
+ ftl_io_write(io);
+}
+
+/*
+ * Since we have no control over the order in which the requests complete in regards to their
+ * submission, the cache can be in either of the following states:
+ * - [1 1 1 1 1 1 1 1 1 1]: simplest case, whole cache contains single phase (although it should be
+ * very rare),
+ * - [1 1 1 1 3 3 3 3 3 3]: two phases, changing somewhere in the middle with no overlap. This is
+ * the state left by clean shutdown,
+ * - [1 1 1 1 3 1 3 3 3 3]: similar to the above, but this time the two ranges overlap. This
+ * happens when completions are reordered during unsafe shutdown,
+ * - [2 1 2 1 1 1 1 3 1 3]: three different phases, each one of which can overlap with
+ * previous/next one. The data from the oldest phase doesn't need to be
+ * recovered, as it was already being written to, which means it's
+ * already on the main storage.
+ */
+static void
+ftl_nv_cache_scan_done(struct ftl_nv_cache_restore *restore)
+{
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+#if defined(DEBUG)
+ struct ftl_nv_cache_range *range;
+ uint64_t i, num_blocks = 0;
+
+ for (i = 0; i < FTL_NV_CACHE_PHASE_COUNT; ++i) {
+ range = &restore->range[i];
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Range %"PRIu64": %"PRIu64"-%"PRIu64" (%" PRIu64
+ ")\n", i, range->start_addr, range->last_addr, range->num_blocks);
+ num_blocks += range->num_blocks;
+ }
+ assert(num_blocks == nv_cache->num_data_blocks);
+#endif
+ restore->phase = ftl_nv_cache_prev_phase(nv_cache->phase);
+
+ /*
+ * Only the latest two phases need to be recovered. The third one, even if present,
+ * already has to be stored on the main storage, as it's already started to be
+ * overwritten (only present here because of reordering of requests' completions).
+ */
+ restore->range[nv_cache->phase].recovery = true;
+ restore->range[restore->phase].recovery = true;
+
+ ftl_nv_cache_recover_range(restore);
+}
+
+static int ftl_nv_cache_scan_block(struct ftl_nv_cache_block *block);
+
+static void
+ftl_nv_cache_scan_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_nv_cache_block *block = cb_arg;
+ struct ftl_nv_cache_restore *restore = block->parent;
+ struct ftl_nv_cache_range *range;
+ struct spdk_bdev *bdev;
+ unsigned int phase;
+ uint64_t lba;
+
+ restore->num_outstanding--;
+ bdev = spdk_bdev_desc_get_bdev(restore->nv_cache->bdev_desc);
+ spdk_bdev_free_io(bdev_io);
+
+ if (!success) {
+ SPDK_ERRLOG("Non-volatile cache scan failed on block %"PRIu64"\n",
+ block->offset);
+ ftl_nv_cache_restore_complete(restore, -EIO);
+ return;
+ }
+
+ /* If we've already hit an error, don't bother with scanning anything else */
+ if (spdk_unlikely(restore->status != 0)) {
+ ftl_nv_cache_restore_complete(restore, restore->status);
+ return;
+ }
+
+ ftl_nv_cache_unpack_lba(*(uint64_t *)block->md_buf, &lba, &phase);
+ range = &restore->range[phase];
+ range->num_blocks++;
+
+ if (range->start_addr == FTL_LBA_INVALID || range->start_addr > block->offset) {
+ range->start_addr = block->offset;
+ }
+
+ if (range->last_addr == FTL_LBA_INVALID || range->last_addr < block->offset) {
+ range->last_addr = block->offset;
+ }
+
+ /* All the blocks were read, once they're all completed and we're finished */
+ if (restore->current_addr == spdk_bdev_get_num_blocks(bdev)) {
+ if (restore->num_outstanding == 0) {
+ ftl_nv_cache_scan_done(restore);
+ }
+
+ return;
+ }
+
+ ftl_nv_cache_scan_block(block);
+}
+
+static int
+ftl_nv_cache_scan_block(struct ftl_nv_cache_block *block)
+{
+ struct ftl_nv_cache_restore *restore = block->parent;
+ struct ftl_nv_cache *nv_cache = restore->nv_cache;
+ int rc;
+
+ restore->num_outstanding++;
+ block->offset = restore->current_addr++;
+ rc = spdk_bdev_read_blocks_with_md(nv_cache->bdev_desc, restore->ioch,
+ block->buf, block->md_buf,
+ block->offset, 1, ftl_nv_cache_scan_cb,
+ block);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Non-volatile cache scan failed on block %"PRIu64" (%s)\n",
+ block->offset, spdk_strerror(-rc));
+ restore->num_outstanding--;
+ ftl_nv_cache_restore_complete(restore, rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void
+ftl_nv_cache_clean_header_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_nv_cache_restore *restore = cb_arg;
+
+ spdk_bdev_free_io(bdev_io);
+ if (spdk_unlikely(!success)) {
+ SPDK_ERRLOG("Unable to write the non-volatile cache metadata header\n");
+ ftl_nv_cache_restore_complete(restore, -EIO);
+ return;
+ }
+
+ ftl_nv_cache_restore_done(restore, restore->current_addr);
+}
+
+static bool
+ftl_nv_cache_header_valid(struct spdk_ftl_dev *dev, const struct ftl_nv_cache_header *hdr)
+{
+ struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(dev->nv_cache.bdev_desc);
+ uint32_t checksum;
+
+ checksum = spdk_crc32c_update(hdr, offsetof(struct ftl_nv_cache_header, checksum), 0);
+ if (checksum != hdr->checksum) {
+ SPDK_ERRLOG("Invalid header checksum (found: %"PRIu32", expected: %"PRIu32")\n",
+ checksum, hdr->checksum);
+ return false;
+ }
+
+ if (hdr->version != FTL_NV_CACHE_HEADER_VERSION) {
+ SPDK_ERRLOG("Invalid header version (found: %"PRIu32", expected: %"PRIu32")\n",
+ hdr->version, FTL_NV_CACHE_HEADER_VERSION);
+ return false;
+ }
+
+ if (hdr->size != spdk_bdev_get_num_blocks(bdev)) {
+ SPDK_ERRLOG("Unexpected size of the non-volatile cache bdev (%"PRIu64", expected: %"
+ PRIu64")\n", hdr->size, spdk_bdev_get_num_blocks(bdev));
+ return false;
+ }
+
+ if (spdk_uuid_compare(&hdr->uuid, &dev->uuid)) {
+ SPDK_ERRLOG("Invalid device UUID\n");
+ return false;
+ }
+
+ if (!ftl_nv_cache_phase_is_valid(hdr->phase) && hdr->phase != 0) {
+ return false;
+ }
+
+ if ((hdr->current_addr >= spdk_bdev_get_num_blocks(bdev) ||
+ hdr->current_addr < FTL_NV_CACHE_DATA_OFFSET) &&
+ (hdr->current_addr != FTL_LBA_INVALID)) {
+ SPDK_ERRLOG("Unexpected value of non-volatile cache's current address: %"PRIu64"\n",
+ hdr->current_addr);
+ return false;
+ }
+
+ return true;
+}
+
+static void
+ftl_nv_cache_read_header_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
+{
+ struct ftl_restore *restore = cb_arg;
+ struct spdk_ftl_dev *dev = restore->dev;
+ struct ftl_nv_cache *nv_cache = &dev->nv_cache;
+ struct ftl_nv_cache_header *hdr;
+ struct iovec *iov = NULL;
+ int iov_cnt = 0, i, rc;
+
+ if (!success) {
+ SPDK_ERRLOG("Unable to read non-volatile cache metadata header\n");
+ ftl_restore_complete(restore, -ENOTRECOVERABLE);
+ goto out;
+ }
+
+ spdk_bdev_io_get_iovec(bdev_io, &iov, &iov_cnt);
+ assert(iov != NULL);
+ hdr = iov[0].iov_base;
+
+ if (!ftl_nv_cache_header_valid(dev, hdr)) {
+ ftl_restore_complete(restore, -ENOTRECOVERABLE);
+ goto out;
+ }
+
+ /* Remember the latest phase */
+ nv_cache->phase = hdr->phase;
+
+ /* If the phase equals zero, we lost power during recovery. We need to finish it up
+ * by scrubbing the device once again.
+ */
+ if (hdr->phase == 0) {
+ SPDK_DEBUGLOG(SPDK_LOG_FTL_INIT, "Detected phase 0, restarting scrub\n");
+ rc = ftl_nv_cache_scrub(nv_cache, ftl_nv_cache_scrub_cb, restore);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Unable to scrub the non-volatile cache: %s\n",
+ spdk_strerror(-rc));
+ ftl_restore_complete(restore, -ENOTRECOVERABLE);
+ }
+
+ goto out;
+ }
+
+ /* Valid current_addr means that the shutdown was clean, so we just need to overwrite the
+ * header to make sure that any power loss occurring before the cache is wrapped won't be
+ * mistaken for a clean shutdown.
+ */
+ if (hdr->current_addr != FTL_LBA_INVALID) {
+ restore->nv_cache.current_addr = hdr->current_addr;
+
+ rc = ftl_nv_cache_write_header(nv_cache, false, ftl_nv_cache_clean_header_cb,
+ &restore->nv_cache);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Failed to overwrite the non-volatile cache header: %s\n",
+ spdk_strerror(-rc));
+ ftl_restore_complete(restore, -ENOTRECOVERABLE);
+ }
+
+ goto out;
+ }
+
+ /* Otherwise the shutdown was unexpected, so we need to recover the data from the cache */
+ restore->nv_cache.current_addr = FTL_NV_CACHE_DATA_OFFSET;
+
+ for (i = 0; i < FTL_NV_CACHE_RESTORE_DEPTH; ++i) {
+ if (ftl_nv_cache_scan_block(&restore->nv_cache.block[i])) {
+ break;
+ }
+ }
+out:
+ spdk_bdev_free_io(bdev_io);
+}
+
+void
+ftl_restore_nv_cache(struct ftl_restore *restore, ftl_restore_fn cb, void *cb_arg)
+{
+ struct spdk_ftl_dev *dev = restore->dev;
+ struct spdk_bdev *bdev;
+ struct ftl_nv_cache *nv_cache = &dev->nv_cache;
+ struct ftl_io_channel *ioch;
+ struct ftl_nv_cache_restore *nvc_restore = &restore->nv_cache;
+ struct ftl_nv_cache_block *block;
+ size_t alignment;
+ int rc, i;
+
+ ioch = ftl_io_channel_get_ctx(ftl_get_io_channel(dev));
+ bdev = spdk_bdev_desc_get_bdev(nv_cache->bdev_desc);
+ alignment = spdk_max(spdk_bdev_get_buf_align(bdev), sizeof(uint64_t));
+
+ nvc_restore->nv_cache = nv_cache;
+ nvc_restore->ioch = ioch->cache_ioch;
+
+ restore->final_phase = true;
+ restore->cb = cb;
+ restore->cb_arg = cb_arg;
+
+ for (i = 0; i < FTL_NV_CACHE_RESTORE_DEPTH; ++i) {
+ block = &nvc_restore->block[i];
+ block->parent = nvc_restore;
+ block->buf = spdk_dma_zmalloc(spdk_bdev_get_block_size(bdev) +
+ spdk_bdev_get_md_size(bdev),
+ alignment, NULL);
+ if (!block->buf) {
+ /* The memory will be freed in ftl_restore_free */
+ SPDK_ERRLOG("Unable to allocate memory\n");
+ ftl_restore_complete(restore, -ENOMEM);
+ return;
+ }
+
+ block->md_buf = (char *)block->buf + spdk_bdev_get_block_size(bdev);
+ }
+
+ for (i = 0; i < FTL_NV_CACHE_PHASE_COUNT; ++i) {
+ nvc_restore->range[i].parent = nvc_restore;
+ nvc_restore->range[i].start_addr = FTL_LBA_INVALID;
+ nvc_restore->range[i].last_addr = FTL_LBA_INVALID;
+ nvc_restore->range[i].num_blocks = 0;
+ nvc_restore->range[i].recovery = false;
+ nvc_restore->range[i].phase = i;
+ }
+
+ rc = spdk_bdev_read_blocks(nv_cache->bdev_desc, ioch->cache_ioch, nv_cache->dma_buf,
+ 0, FTL_NV_CACHE_DATA_OFFSET, ftl_nv_cache_read_header_cb, restore);
+ if (spdk_unlikely(rc != 0)) {
+ SPDK_ERRLOG("Failed to read non-volatile cache metadata header: %s\n",
+ spdk_strerror(-rc));
+ ftl_restore_complete(restore, rc);
+ }
+}
+
+static bool
+ftl_pad_zone_pad_finish(struct ftl_restore_band *rband, bool direct_access)
+{
+ struct ftl_restore *restore = rband->parent;
+ struct ftl_restore_band *next_band;
+ size_t i, num_pad_zones = 0;
+
+ if (spdk_unlikely(restore->pad_status && !restore->num_ios)) {
+ if (direct_access) {
+ /* In case of any errors found we want to clear direct access. */
+ /* Direct access bands have their own allocated md, which would be lost */
+ /* on restore complete otherwise. */
+ rband->band->state = FTL_BAND_STATE_CLOSED;
+ ftl_band_set_direct_access(rband->band, false);
+ }
+ ftl_restore_complete(restore, restore->pad_status);
+ return true;
+ }
+
+ for (i = 0; i < rband->band->num_zones; ++i) {
+ if (rband->band->zone_buf[i].info.state != SPDK_BDEV_ZONE_STATE_FULL) {
+ num_pad_zones++;
+ }
+ }
+
+ /* Finished all zones in a band, check if all bands are done */
+ if (num_pad_zones == 0) {
+ if (direct_access) {
+ rband->band->state = FTL_BAND_STATE_CLOSED;
+ ftl_band_set_direct_access(rband->band, false);
+ }
+
+ next_band = STAILQ_NEXT(rband, stailq);
+ if (!next_band) {
+ ftl_restore_complete(restore, restore->pad_status);
+ return true;
+ } else {
+ /* Start off padding in the next band */
+ ftl_restore_pad_band(next_band);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static struct ftl_io *
+ftl_restore_init_pad_io(struct ftl_restore_band *rband, void *buffer,
+ struct ftl_addr addr)
+{
+ struct ftl_band *band = rband->band;
+ struct spdk_ftl_dev *dev = band->dev;
+ int flags = FTL_IO_PAD | FTL_IO_INTERNAL | FTL_IO_PHYSICAL_MODE | FTL_IO_MD |
+ FTL_IO_DIRECT_ACCESS;
+ struct ftl_io_init_opts opts = {
+ .dev = dev,
+ .io = NULL,
+ .band = band,
+ .size = sizeof(struct ftl_io),
+ .flags = flags,
+ .type = FTL_IO_WRITE,
+ .num_blocks = dev->xfer_size,
+ .cb_fn = ftl_pad_zone_cb,
+ .cb_ctx = rband,
+ .iovs = {
+ {
+ .iov_base = buffer,
+ .iov_len = dev->xfer_size * FTL_BLOCK_SIZE,
+ }
+ },
+ .iovcnt = 1,
+ .parent = NULL,
+ };
+ struct ftl_io *io;
+
+ io = ftl_io_init_internal(&opts);
+ if (spdk_unlikely(!io)) {
+ return NULL;
+ }
+
+ io->addr = addr;
+ rband->parent->num_ios++;
+
+ return io;
+}
+
+static void
+ftl_pad_zone_cb(struct ftl_io *io, void *arg, int status)
+{
+ struct ftl_restore_band *rband = arg;
+ struct ftl_restore *restore = rband->parent;
+ struct ftl_band *band = io->band;
+ struct ftl_zone *zone;
+ struct ftl_io *new_io;
+ uint64_t offset;
+
+ restore->num_ios--;
+ /* TODO check for next unit error vs early close error */
+ if (status) {
+ restore->pad_status = status;
+ goto end;
+ }
+
+ offset = io->addr.offset % ftl_get_num_blocks_in_zone(restore->dev);
+ if (offset + io->num_blocks == ftl_get_num_blocks_in_zone(restore->dev)) {
+ zone = ftl_band_zone_from_addr(band, io->addr);
+ zone->info.state = SPDK_BDEV_ZONE_STATE_FULL;
+ } else {
+ struct ftl_addr addr = io->addr;
+ addr.offset += io->num_blocks;
+ new_io = ftl_restore_init_pad_io(rband, io->iov[0].iov_base, addr);
+ if (spdk_unlikely(!new_io)) {
+ restore->pad_status = -ENOMEM;
+ goto end;
+ }
+
+ ftl_io_write(new_io);
+ return;
+ }
+
+end:
+ spdk_dma_free(io->iov[0].iov_base);
+ ftl_pad_zone_pad_finish(rband, true);
+}
+
+static void
+ftl_restore_pad_band(struct ftl_restore_band *rband)
+{
+ struct ftl_restore *restore = rband->parent;
+ struct ftl_band *band = rband->band;
+ struct spdk_ftl_dev *dev = band->dev;
+ void *buffer = NULL;
+ struct ftl_io *io;
+ struct ftl_addr addr;
+ size_t i;
+ int rc = 0;
+
+ /* Check if some zones are not closed */
+ if (ftl_pad_zone_pad_finish(rband, false)) {
+ /*
+ * If we're here, end meta wasn't recognized, but the whole band is written
+ * Assume the band was padded and ignore it
+ */
+ return;
+ }
+
+ band->state = FTL_BAND_STATE_OPEN;
+ rc = ftl_band_set_direct_access(band, true);
+ if (rc) {
+ ftl_restore_complete(restore, rc);
+ return;
+ }
+
+ for (i = 0; i < band->num_zones; ++i) {
+ if (band->zone_buf[i].info.state == SPDK_BDEV_ZONE_STATE_FULL) {
+ continue;
+ }
+
+ addr.offset = band->zone_buf[i].info.write_pointer;
+
+ buffer = spdk_dma_zmalloc(FTL_BLOCK_SIZE * dev->xfer_size, 0, NULL);
+ if (spdk_unlikely(!buffer)) {
+ rc = -ENOMEM;
+ goto error;
+ }
+
+ io = ftl_restore_init_pad_io(rband, buffer, addr);
+ if (spdk_unlikely(!io)) {
+ rc = -ENOMEM;
+ spdk_dma_free(buffer);
+ goto error;
+ }
+
+ ftl_io_write(io);
+ }
+
+ return;
+
+error:
+ restore->pad_status = rc;
+ ftl_pad_zone_pad_finish(rband, true);
+}
+
+static void
+ftl_restore_pad_open_bands(void *ctx)
+{
+ struct ftl_restore *restore = ctx;
+
+ ftl_restore_pad_band(STAILQ_FIRST(&restore->pad_bands));
+}
+
+static void
+ftl_restore_tail_md_cb(struct ftl_io *io, void *ctx, int status)
+{
+ struct ftl_restore_band *rband = ctx;
+ struct ftl_restore *restore = rband->parent;
+ struct spdk_ftl_dev *dev = restore->dev;
+
+ if (status) {
+ if (!dev->conf.allow_open_bands) {
+ SPDK_ERRLOG("%s while restoring tail md in band %u.\n",
+ spdk_strerror(-status), rband->band->id);
+ ftl_band_release_lba_map(rband->band);
+ ftl_restore_complete(restore, status);
+ return;
+ } else {
+ SPDK_ERRLOG("%s while restoring tail md. Will attempt to pad band %u.\n",
+ spdk_strerror(-status), rband->band->id);
+ STAILQ_INSERT_TAIL(&restore->pad_bands, rband, stailq);
+ }
+ }
+
+ if (!status && ftl_restore_l2p(rband->band)) {
+ ftl_band_release_lba_map(rband->band);
+ ftl_restore_complete(restore, -ENOTRECOVERABLE);
+ return;
+ }
+ ftl_band_release_lba_map(rband->band);
+
+ rband = ftl_restore_next_band(restore);
+ if (!rband) {
+ if (!STAILQ_EMPTY(&restore->pad_bands)) {
+ spdk_thread_send_msg(ftl_get_core_thread(dev), ftl_restore_pad_open_bands,
+ restore);
+ } else {
+ ftl_restore_complete(restore, 0);
+ }
+
+ return;
+ }
+
+ ftl_restore_tail_md(rband);
+}
+
+static int
+ftl_restore_tail_md(struct ftl_restore_band *rband)
+{
+ struct ftl_restore *restore = rband->parent;
+ struct ftl_band *band = rband->band;
+
+ if (ftl_band_alloc_lba_map(band)) {
+ SPDK_ERRLOG("Failed to allocate lba map\n");
+ ftl_restore_complete(restore, -ENOMEM);
+ return -ENOMEM;
+ }
+
+ if (ftl_band_read_tail_md(band, band->tail_md_addr, ftl_restore_tail_md_cb, rband)) {
+ SPDK_ERRLOG("Failed to send tail metadata read\n");
+ ftl_restore_complete(restore, -EIO);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int
+ftl_restore_device(struct ftl_restore *restore, ftl_restore_fn cb, void *cb_arg)
+{
+ struct spdk_ftl_dev *dev = restore->dev;
+ struct ftl_restore_band *rband;
+
+ restore->current = 0;
+ restore->cb = cb;
+ restore->cb_arg = cb_arg;
+ restore->final_phase = dev->nv_cache.bdev_desc == NULL;
+
+ /* If restore_device is called, there must be at least one valid band */
+ rband = ftl_restore_next_band(restore);
+ assert(rband);
+ return ftl_restore_tail_md(rband);
+}
diff --git a/src/spdk/lib/ftl/ftl_trace.c b/src/spdk/lib/ftl/ftl_trace.c
new file mode 100644
index 000000000..ba66323ad
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_trace.c
@@ -0,0 +1,361 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/trace.h"
+
+#include "ftl_core.h"
+#include "ftl_trace.h"
+#include "ftl_io.h"
+#include "ftl_band.h"
+
+#if defined(DEBUG)
+
+#define OWNER_FTL 0x20
+#define TRACE_GROUP_FTL 0x6
+
+enum ftl_trace_source {
+ FTL_TRACE_SOURCE_INTERNAL,
+ FTL_TRACE_SOURCE_USER,
+ FTL_TRACE_SOURCE_MAX,
+};
+
+#define FTL_TPOINT_ID(id, src) SPDK_TPOINT_ID(TRACE_GROUP_FTL, (((id) << 1) | (!!(src))))
+
+#define FTL_TRACE_BAND_DEFRAG(src) FTL_TPOINT_ID(0, src)
+#define FTL_TRACE_BAND_WRITE(src) FTL_TPOINT_ID(1, src)
+#define FTL_TRACE_LIMITS(src) FTL_TPOINT_ID(2, src)
+#define FTL_TRACE_WBUF_POP(src) FTL_TPOINT_ID(3, src)
+
+#define FTL_TRACE_READ_SCHEDULE(src) FTL_TPOINT_ID(4, src)
+#define FTL_TRACE_READ_SUBMISSION(src) FTL_TPOINT_ID(5, src)
+#define FTL_TRACE_READ_COMPLETION_INVALID(src) FTL_TPOINT_ID(6, src)
+#define FTL_TRACE_READ_COMPLETION_CACHE(src) FTL_TPOINT_ID(7, src)
+#define FTL_TRACE_READ_COMPLETION_DISK(src) FTL_TPOINT_ID(8, src)
+
+#define FTL_TRACE_MD_READ_SCHEDULE(src) FTL_TPOINT_ID(9, src)
+#define FTL_TRACE_MD_READ_SUBMISSION(src) FTL_TPOINT_ID(10, src)
+#define FTL_TRACE_MD_READ_COMPLETION(src) FTL_TPOINT_ID(11, src)
+
+#define FTL_TRACE_WRITE_SCHEDULE(src) FTL_TPOINT_ID(12, src)
+#define FTL_TRACE_WRITE_WBUF_FILL(src) FTL_TPOINT_ID(13, src)
+#define FTL_TRACE_WRITE_SUBMISSION(src) FTL_TPOINT_ID(14, src)
+#define FTL_TRACE_WRITE_COMPLETION(src) FTL_TPOINT_ID(15, src)
+
+#define FTL_TRACE_MD_WRITE_SCHEDULE(src) FTL_TPOINT_ID(16, src)
+#define FTL_TRACE_MD_WRITE_SUBMISSION(src) FTL_TPOINT_ID(17, src)
+#define FTL_TRACE_MD_WRITE_COMPLETION(src) FTL_TPOINT_ID(18, src)
+
+#define FTL_TRACE_ERASE_SUBMISSION(src) FTL_TPOINT_ID(19, src)
+#define FTL_TRACE_ERASE_COMPLETION(src) FTL_TPOINT_ID(20, src)
+
+SPDK_TRACE_REGISTER_FN(ftl_trace_func, "ftl", TRACE_GROUP_FTL)
+{
+ const char source[] = { 'i', 'u' };
+ char descbuf[128];
+ int i;
+
+ spdk_trace_register_owner(OWNER_FTL, 'f');
+
+ for (i = 0; i < FTL_TRACE_SOURCE_MAX; ++i) {
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "band_defrag");
+ spdk_trace_register_description(descbuf, FTL_TRACE_BAND_DEFRAG(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "band: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "band_write");
+ spdk_trace_register_description(descbuf, FTL_TRACE_BAND_WRITE(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "band: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "limits");
+ spdk_trace_register_description(descbuf, FTL_TRACE_LIMITS(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "limits: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "rwb_pop");
+ spdk_trace_register_description(descbuf, FTL_TRACE_WBUF_POP(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "md_read_sched");
+ spdk_trace_register_description(descbuf, FTL_TRACE_MD_READ_SCHEDULE(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "addr: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "md_read_submit");
+ spdk_trace_register_description(descbuf, FTL_TRACE_MD_READ_SUBMISSION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "addr: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "md_read_cmpl");
+ spdk_trace_register_description(descbuf, FTL_TRACE_MD_READ_COMPLETION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "md_write_sched");
+ spdk_trace_register_description(descbuf, FTL_TRACE_MD_WRITE_SCHEDULE(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "addr: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "md_write_submit");
+ spdk_trace_register_description(descbuf, FTL_TRACE_MD_WRITE_SUBMISSION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "addr: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "md_write_cmpl");
+ spdk_trace_register_description(descbuf, FTL_TRACE_MD_WRITE_COMPLETION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "read_sched");
+ spdk_trace_register_description(descbuf, FTL_TRACE_READ_SCHEDULE(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "read_submit");
+ spdk_trace_register_description(descbuf, FTL_TRACE_READ_SUBMISSION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "addr: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "read_cmpl_invld");
+ spdk_trace_register_description(descbuf, FTL_TRACE_READ_COMPLETION_INVALID(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "read_cmpl_cache");
+ spdk_trace_register_description(descbuf, FTL_TRACE_READ_COMPLETION_CACHE(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "read_cmpl_ssd");
+ spdk_trace_register_description(descbuf, FTL_TRACE_READ_COMPLETION_DISK(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "write_sched");
+ spdk_trace_register_description(descbuf, FTL_TRACE_WRITE_SCHEDULE(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "rwb_fill");
+ spdk_trace_register_description(descbuf, FTL_TRACE_WRITE_WBUF_FILL(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "write_submit");
+ spdk_trace_register_description(descbuf, FTL_TRACE_WRITE_SUBMISSION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "addr: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "write_cmpl");
+ spdk_trace_register_description(descbuf, FTL_TRACE_WRITE_COMPLETION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "lba: ");
+
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "erase_submit");
+ spdk_trace_register_description(descbuf, FTL_TRACE_ERASE_SUBMISSION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "addr: ");
+ snprintf(descbuf, sizeof(descbuf), "%c %s", source[i], "erase_cmpl");
+ spdk_trace_register_description(descbuf, FTL_TRACE_ERASE_COMPLETION(i),
+ OWNER_FTL, OBJECT_NONE, 0, 0, "addr: ");
+ }
+}
+
+static uint16_t
+ftl_trace_io_source(const struct ftl_io *io)
+{
+ if (io->flags & FTL_IO_INTERNAL) {
+ return FTL_TRACE_SOURCE_INTERNAL;
+ } else {
+ return FTL_TRACE_SOURCE_USER;
+ }
+}
+
+static uint64_t
+ftl_trace_next_id(struct ftl_trace *trace)
+{
+ assert(trace->id != FTL_TRACE_INVALID_ID);
+ return __atomic_fetch_add(&trace->id, 1, __ATOMIC_SEQ_CST);
+}
+
+void
+ftl_trace_defrag_band(struct spdk_ftl_dev *dev, const struct ftl_band *band)
+{
+ struct ftl_trace *trace = &dev->stats.trace;
+
+ spdk_trace_record(FTL_TRACE_BAND_DEFRAG(FTL_TRACE_SOURCE_INTERNAL),
+ ftl_trace_next_id(trace), 0, band->lba_map.num_vld, band->id);
+}
+
+void
+ftl_trace_write_band(struct spdk_ftl_dev *dev, const struct ftl_band *band)
+{
+ struct ftl_trace *trace = &dev->stats.trace;
+
+ spdk_trace_record(FTL_TRACE_BAND_WRITE(FTL_TRACE_SOURCE_INTERNAL),
+ ftl_trace_next_id(trace), 0, 0, band->id);
+}
+
+void
+ftl_trace_lba_io_init(struct spdk_ftl_dev *dev, const struct ftl_io *io)
+{
+ uint16_t tpoint_id = 0, source;
+
+ assert(io->trace != FTL_TRACE_INVALID_ID);
+ source = ftl_trace_io_source(io);
+
+ if (io->flags & FTL_IO_MD) {
+ switch (io->type) {
+ case FTL_IO_READ:
+ tpoint_id = FTL_TRACE_MD_READ_SCHEDULE(source);
+ break;
+ case FTL_IO_WRITE:
+ tpoint_id = FTL_TRACE_MD_WRITE_SCHEDULE(source);
+ break;
+ default:
+ assert(0);
+ }
+ } else {
+ switch (io->type) {
+ case FTL_IO_READ:
+ tpoint_id = FTL_TRACE_READ_SCHEDULE(source);
+ break;
+ case FTL_IO_WRITE:
+ tpoint_id = FTL_TRACE_WRITE_SCHEDULE(source);
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ spdk_trace_record(tpoint_id, io->trace, io->num_blocks, 0, ftl_io_get_lba(io, 0));
+}
+
+void
+ftl_trace_wbuf_fill(struct spdk_ftl_dev *dev, const struct ftl_io *io)
+{
+ assert(io->trace != FTL_TRACE_INVALID_ID);
+
+ spdk_trace_record(FTL_TRACE_WRITE_WBUF_FILL(ftl_trace_io_source(io)), io->trace,
+ 0, 0, ftl_io_current_lba(io));
+}
+
+void
+ftl_trace_wbuf_pop(struct spdk_ftl_dev *dev, const struct ftl_wbuf_entry *entry)
+{
+ uint16_t tpoint_id;
+
+ assert(entry->trace != FTL_TRACE_INVALID_ID);
+
+ if (entry->io_flags & FTL_IO_INTERNAL) {
+ tpoint_id = FTL_TRACE_WBUF_POP(FTL_TRACE_SOURCE_INTERNAL);
+ } else {
+ tpoint_id = FTL_TRACE_WBUF_POP(FTL_TRACE_SOURCE_USER);
+ }
+
+ spdk_trace_record(tpoint_id, entry->trace, 0, entry->addr.offset, entry->lba);
+}
+
+void
+ftl_trace_completion(struct spdk_ftl_dev *dev, const struct ftl_io *io,
+ enum ftl_trace_completion completion)
+{
+ uint16_t tpoint_id = 0, source;
+
+ assert(io->trace != FTL_TRACE_INVALID_ID);
+ source = ftl_trace_io_source(io);
+
+ if (io->flags & FTL_IO_MD) {
+ switch (io->type) {
+ case FTL_IO_READ:
+ tpoint_id = FTL_TRACE_MD_READ_COMPLETION(source);
+ break;
+ case FTL_IO_WRITE:
+ tpoint_id = FTL_TRACE_MD_WRITE_COMPLETION(source);
+ break;
+ default:
+ assert(0);
+ }
+ } else {
+ switch (io->type) {
+ case FTL_IO_READ:
+ switch (completion) {
+ case FTL_TRACE_COMPLETION_INVALID:
+ tpoint_id = FTL_TRACE_READ_COMPLETION_INVALID(source);
+ break;
+ case FTL_TRACE_COMPLETION_CACHE:
+ tpoint_id = FTL_TRACE_READ_COMPLETION_CACHE(source);
+ break;
+ case FTL_TRACE_COMPLETION_DISK:
+ tpoint_id = FTL_TRACE_READ_COMPLETION_DISK(source);
+ break;
+ }
+ break;
+ case FTL_IO_WRITE:
+ tpoint_id = FTL_TRACE_WRITE_COMPLETION(source);
+ break;
+ case FTL_IO_ERASE:
+ tpoint_id = FTL_TRACE_ERASE_COMPLETION(source);
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ spdk_trace_record(tpoint_id, io->trace, 0, 0, ftl_io_get_lba(io, io->pos - 1));
+}
+
+void
+ftl_trace_submission(struct spdk_ftl_dev *dev, const struct ftl_io *io, struct ftl_addr addr,
+ size_t addr_cnt)
+{
+ uint16_t tpoint_id = 0, source;
+
+ assert(io->trace != FTL_TRACE_INVALID_ID);
+ source = ftl_trace_io_source(io);
+
+ if (io->flags & FTL_IO_MD) {
+ switch (io->type) {
+ case FTL_IO_READ:
+ tpoint_id = FTL_TRACE_MD_READ_SUBMISSION(source);
+ break;
+ case FTL_IO_WRITE:
+ tpoint_id = FTL_TRACE_MD_WRITE_SUBMISSION(source);
+ break;
+ default:
+ assert(0);
+ }
+ } else {
+ switch (io->type) {
+ case FTL_IO_READ:
+ tpoint_id = FTL_TRACE_READ_SUBMISSION(source);
+ break;
+ case FTL_IO_WRITE:
+ tpoint_id = FTL_TRACE_WRITE_SUBMISSION(source);
+ break;
+ case FTL_IO_ERASE:
+ tpoint_id = FTL_TRACE_ERASE_SUBMISSION(source);
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ spdk_trace_record(tpoint_id, io->trace, addr_cnt, 0, addr.offset);
+}
+
+void
+ftl_trace_limits(struct spdk_ftl_dev *dev, int limit, size_t num_free)
+{
+ struct ftl_trace *trace = &dev->stats.trace;
+
+ spdk_trace_record(FTL_TRACE_LIMITS(FTL_TRACE_SOURCE_INTERNAL), ftl_trace_next_id(trace),
+ num_free, limit, 0);
+}
+
+uint64_t
+ftl_trace_alloc_id(struct spdk_ftl_dev *dev)
+{
+ struct ftl_trace *trace = &dev->stats.trace;
+
+ return ftl_trace_next_id(trace);
+}
+
+#endif /* defined(DEBUG) */
diff --git a/src/spdk/lib/ftl/ftl_trace.h b/src/spdk/lib/ftl/ftl_trace.h
new file mode 100644
index 000000000..52988cff6
--- /dev/null
+++ b/src/spdk/lib/ftl/ftl_trace.h
@@ -0,0 +1,84 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FTL_TRACE_H
+#define FTL_TRACE_H
+
+#include "ftl_addr.h"
+
+#define FTL_TRACE_INVALID_ID ((uint64_t) -1)
+
+enum ftl_trace_completion {
+ FTL_TRACE_COMPLETION_INVALID,
+ FTL_TRACE_COMPLETION_CACHE,
+ FTL_TRACE_COMPLETION_DISK,
+};
+
+struct ftl_trace {
+ /* Monotonically incrementing event id */
+ uint64_t id;
+};
+
+struct spdk_ftl_dev;
+struct ftl_trace;
+struct ftl_io;
+struct ftl_wbuf_entry;
+struct ftl_band;
+
+#if defined(DEBUG)
+uint64_t ftl_trace_alloc_id(struct spdk_ftl_dev *dev);
+void ftl_trace_defrag_band(struct spdk_ftl_dev *dev, const struct ftl_band *band);
+void ftl_trace_write_band(struct spdk_ftl_dev *dev, const struct ftl_band *band);
+void ftl_trace_lba_io_init(struct spdk_ftl_dev *dev, const struct ftl_io *io);
+void ftl_trace_wbuf_fill(struct spdk_ftl_dev *dev, const struct ftl_io *io);
+void ftl_trace_wbuf_pop(struct spdk_ftl_dev *dev, const struct ftl_wbuf_entry *entry);
+void ftl_trace_submission(struct spdk_ftl_dev *dev,
+ const struct ftl_io *io,
+ struct ftl_addr addr, size_t addr_cnt);
+void ftl_trace_completion(struct spdk_ftl_dev *dev,
+ const struct ftl_io *io,
+ enum ftl_trace_completion type);
+void ftl_trace_limits(struct spdk_ftl_dev *dev, int limit, size_t num_free);
+#else /* defined(DEBUG) */
+#define ftl_trace_alloc_id(dev) FTL_TRACE_INVALID_ID
+#define ftl_trace_defrag_band(dev, band)
+#define ftl_trace_write_band(dev, band)
+#define ftl_trace_lba_io_init(dev, io)
+#define ftl_trace_wbuf_fill(dev, io)
+#define ftl_trace_wbuf_pop(dev, entry)
+#define ftl_trace_submission(dev, io, addr, addr_cnt)
+#define ftl_trace_completion(dev, io, type)
+#define ftl_trace_limits(dev, limits, num_free)
+#endif
+
+#endif /* FTL_TRACE_H */
diff --git a/src/spdk/lib/ftl/spdk_ftl.map b/src/spdk/lib/ftl/spdk_ftl.map
new file mode 100644
index 000000000..141fd01e0
--- /dev/null
+++ b/src/spdk/lib/ftl/spdk_ftl.map
@@ -0,0 +1,14 @@
+{
+ global:
+
+ # public functions
+ spdk_ftl_dev_init;
+ spdk_ftl_dev_free;
+ spdk_ftl_conf_init_defaults;
+ spdk_ftl_dev_get_attrs;
+ spdk_ftl_read;
+ spdk_ftl_write;
+ spdk_ftl_flush;
+
+ local: *;
+};