diff options
Diffstat (limited to '')
21 files changed, 7987 insertions, 0 deletions
diff --git a/src/spdk/dpdk/drivers/baseband/Makefile b/src/spdk/dpdk/drivers/baseband/Makefile new file mode 100644 index 000000000..dcc096917 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +core-libs := librte_eal librte_mbuf librte_mempool librte_ring +core-libs += librte_bbdev librte_kvargs librte_cfgfile + +DIRS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_NULL) += null +DEPDIRS-null = $(core-libs) +DIRS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += turbo_sw +DEPDIRS-turbo_sw = $(core-libs) +DIRS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC) += fpga_lte_fec +DEPDIRS-fpga_lte_fec = $(core-libs) +DIRS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC) += fpga_5gnr_fec +DEPDIRS-fpga_5gnr_fec = $(core-libs) + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/Makefile b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/Makefile new file mode 100644 index 000000000..7b7017c6d --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/Makefile @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2019 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_pmd_bbdev_fpga_5gnr_fec.a + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_bbdev +LDLIBS += -lrte_pci -lrte_bus_pci + +# versioning export map +EXPORT_MAP := rte_pmd_bbdev_fpga_5gnr_fec_version.map + +# library version +LIBABIVER := 1 + +# library source files +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC) += rte_fpga_5gnr_fec.c + +# export include files +SYMLINK-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_FPGA_5GNR_FEC)-include += rte_pmd_fpga_5gnr_fec.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/fpga_5gnr_fec.h b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/fpga_5gnr_fec.h new file mode 100644 index 000000000..e72c95e93 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/fpga_5gnr_fec.h @@ -0,0 +1,388 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#ifndef _FPGA_5GNR_FEC_H_ +#define _FPGA_5GNR_FEC_H_ + +#include <stdint.h> +#include <stdbool.h> + +/* Helper macro for logging */ +#define rte_bbdev_log(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, fpga_5gnr_fec_logtype, fmt "\n", \ + ##__VA_ARGS__) + +#ifdef RTE_LIBRTE_BBDEV_DEBUG +#define rte_bbdev_log_debug(fmt, ...) \ + rte_bbdev_log(DEBUG, "fpga_5gnr_fec: " fmt, \ + ##__VA_ARGS__) +#else +#define rte_bbdev_log_debug(fmt, ...) +#endif + +/* FPGA 5GNR FEC driver names */ +#define FPGA_5GNR_FEC_PF_DRIVER_NAME intel_fpga_5gnr_fec_pf +#define FPGA_5GNR_FEC_VF_DRIVER_NAME intel_fpga_5gnr_fec_vf + +/* FPGA 5GNR FEC PCI vendor & device IDs */ +#define FPGA_5GNR_FEC_VENDOR_ID (0x8086) +#define FPGA_5GNR_FEC_PF_DEVICE_ID (0x0D8F) +#define FPGA_5GNR_FEC_VF_DEVICE_ID (0x0D90) + +/* Align DMA descriptors to 256 bytes - cache-aligned */ +#define FPGA_RING_DESC_ENTRY_LENGTH (8) +/* Ring size is in 256 bits (32 bytes) units */ +#define FPGA_RING_DESC_LEN_UNIT_BYTES (32) +/* Maximum size of queue */ +#define FPGA_RING_MAX_SIZE (1024) +#define FPGA_FLR_TIMEOUT_UNIT (16.384) + +#define FPGA_NUM_UL_QUEUES (32) +#define FPGA_NUM_DL_QUEUES (32) +#define FPGA_TOTAL_NUM_QUEUES (FPGA_NUM_UL_QUEUES + FPGA_NUM_DL_QUEUES) +#define FPGA_NUM_INTR_VEC (FPGA_TOTAL_NUM_QUEUES - RTE_INTR_VEC_RXTX_OFFSET) + +#define FPGA_INVALID_HW_QUEUE_ID (0xFFFFFFFF) + +#define FPGA_QUEUE_FLUSH_TIMEOUT_US (1000) +#define FPGA_HARQ_RDY_TIMEOUT (10) +#define FPGA_TIMEOUT_CHECK_INTERVAL (5) +#define FPGA_DDR_OVERFLOW (0x10) + +#define FPGA_5GNR_FEC_DDR_WR_DATA_LEN_IN_BYTES 8 +#define FPGA_5GNR_FEC_DDR_RD_DATA_LEN_IN_BYTES 8 + +/* Constants from K0 computation from 3GPP 38.212 Table 5.4.2.1-2 */ +#define N_ZC_1 66 /* N = 66 Zc for BG 1 */ +#define N_ZC_2 50 /* N = 50 Zc for BG 2 */ +#define K0_1_1 17 /* K0 fraction numerator for rv 1 and BG 1 */ +#define K0_1_2 13 /* K0 fraction numerator for rv 1 and BG 2 */ +#define K0_2_1 33 /* K0 fraction numerator for rv 2 and BG 1 */ +#define K0_2_2 25 /* K0 fraction numerator for rv 2 and BG 2 */ +#define K0_3_1 56 /* K0 fraction numerator for rv 3 and BG 1 */ +#define K0_3_2 43 /* K0 fraction numerator for rv 3 and BG 2 */ + +/* FPGA 5GNR FEC Register mapping on BAR0 */ +enum { + FPGA_5GNR_FEC_VERSION_ID = 0x00000000, /* len: 4B */ + FPGA_5GNR_FEC_CONFIGURATION = 0x00000004, /* len: 2B */ + FPGA_5GNR_FEC_QUEUE_PF_VF_MAP_DONE = 0x00000008, /* len: 1B */ + FPGA_5GNR_FEC_LOAD_BALANCE_FACTOR = 0x0000000a, /* len: 2B */ + FPGA_5GNR_FEC_RING_DESC_LEN = 0x0000000c, /* len: 2B */ + FPGA_5GNR_FEC_FLR_TIME_OUT = 0x0000000e, /* len: 2B */ + FPGA_5GNR_FEC_VFQ_FLUSH_STATUS_LW = 0x00000018, /* len: 4B */ + FPGA_5GNR_FEC_VFQ_FLUSH_STATUS_HI = 0x0000001c, /* len: 4B */ + FPGA_5GNR_FEC_QUEUE_MAP = 0x00000040, /* len: 256B */ + FPGA_5GNR_FEC_RING_CTRL_REGS = 0x00000200, /* len: 2048B */ + FPGA_5GNR_FEC_DDR4_WR_ADDR_REGS = 0x00000A00, /* len: 4B */ + FPGA_5GNR_FEC_DDR4_WR_DATA_REGS = 0x00000A08, /* len: 8B */ + FPGA_5GNR_FEC_DDR4_WR_DONE_REGS = 0x00000A10, /* len: 1B */ + FPGA_5GNR_FEC_DDR4_RD_ADDR_REGS = 0x00000A18, /* len: 4B */ + FPGA_5GNR_FEC_DDR4_RD_DONE_REGS = 0x00000A20, /* len: 1B */ + FPGA_5GNR_FEC_DDR4_RD_RDY_REGS = 0x00000A28, /* len: 1B */ + FPGA_5GNR_FEC_DDR4_RD_DATA_REGS = 0x00000A30, /* len: 8B */ + FPGA_5GNR_FEC_DDR4_ADDR_RDY_REGS = 0x00000A38, /* len: 1B */ + FPGA_5GNR_FEC_HARQ_BUF_SIZE_RDY_REGS = 0x00000A40, /* len: 1B */ + FPGA_5GNR_FEC_HARQ_BUF_SIZE_REGS = 0x00000A48 /* len: 4B */ +}; + +/* FPGA 5GNR FEC Ring Control Registers */ +enum { + FPGA_5GNR_FEC_RING_HEAD_ADDR = 0x00000008, + FPGA_5GNR_FEC_RING_SIZE = 0x00000010, + FPGA_5GNR_FEC_RING_MISC = 0x00000014, + FPGA_5GNR_FEC_RING_ENABLE = 0x00000015, + FPGA_5GNR_FEC_RING_FLUSH_QUEUE_EN = 0x00000016, + FPGA_5GNR_FEC_RING_SHADOW_TAIL = 0x00000018, + FPGA_5GNR_FEC_RING_HEAD_POINT = 0x0000001C +}; + +/* FPGA 5GNR FEC DESCRIPTOR ERROR */ +enum { + DESC_ERR_NO_ERR = 0x0, + DESC_ERR_K_P_OUT_OF_RANGE = 0x1, + DESC_ERR_Z_C_NOT_LEGAL = 0x2, + DESC_ERR_DESC_OFFSET_ERR = 0x3, + DESC_ERR_DESC_READ_FAIL = 0x8, + DESC_ERR_DESC_READ_TIMEOUT = 0x9, + DESC_ERR_DESC_READ_TLP_POISONED = 0xA, + DESC_ERR_CB_READ_FAIL = 0xC, + DESC_ERR_CB_READ_TIMEOUT = 0xD, + DESC_ERR_CB_READ_TLP_POISONED = 0xE, + DESC_ERR_HBSTORE_ERR = 0xF +}; + + +/* FPGA 5GNR FEC DMA Encoding Request Descriptor */ +struct __rte_packed fpga_dma_enc_desc { + uint32_t done:1, + rsrvd0:7, + error:4, + rsrvd1:4, + num_null:10, + rsrvd2:6; + uint32_t ncb:15, + rsrvd3:1, + k0:16; + uint32_t irq_en:1, + crc_en:1, + rsrvd4:1, + qm_idx:3, + bg_idx:1, + zc:9, + desc_idx:10, + rsrvd5:6; + uint16_t rm_e; + uint16_t k_; + uint32_t out_addr_lw; + uint32_t out_addr_hi; + uint32_t in_addr_lw; + uint32_t in_addr_hi; + + union { + struct { + /* Virtual addresses used to retrieve SW context info */ + void *op_addr; + /* Stores information about total number of Code Blocks + * in currently processed Transport Block + */ + uint64_t cbs_in_op; + }; + + uint8_t sw_ctxt[FPGA_RING_DESC_LEN_UNIT_BYTES * + (FPGA_RING_DESC_ENTRY_LENGTH - 1)]; + }; +}; + + +/* FPGA 5GNR DPC FEC DMA Decoding Request Descriptor */ +struct __rte_packed fpga_dma_dec_desc { + uint32_t done:1, + iter:5, + et_pass:1, + crcb_pass:1, + error:4, + qm_idx:3, + max_iter:5, + bg_idx:1, + rsrvd0:1, + harqin_en:1, + zc:9; + uint32_t hbstroe_offset:22, + num_null:10; + uint32_t irq_en:1, + ncb:15, + desc_idx:10, + drop_crc24b:1, + crc24b_ind:1, + rv:2, + et_dis:1, + rsrvd2:1; + uint32_t harq_input_length:16, + rm_e:16;/*the inbound data byte length*/ + uint32_t out_addr_lw; + uint32_t out_addr_hi; + uint32_t in_addr_lw; + uint32_t in_addr_hi; + + union { + struct { + /* Virtual addresses used to retrieve SW context info */ + void *op_addr; + /* Stores information about total number of Code Blocks + * in currently processed Transport Block + */ + uint8_t cbs_in_op; + }; + + uint32_t sw_ctxt[8 * (FPGA_RING_DESC_ENTRY_LENGTH - 1)]; + }; +}; + +/* FPGA 5GNR DMA Descriptor */ +union fpga_dma_desc { + struct fpga_dma_enc_desc enc_req; + struct fpga_dma_dec_desc dec_req; +}; + +/* FPGA 5GNR FEC Ring Control Register */ +struct __rte_packed fpga_ring_ctrl_reg { + uint64_t ring_base_addr; + uint64_t ring_head_addr; + uint16_t ring_size:11; + uint16_t rsrvd0; + union { /* Miscellaneous register */ + uint8_t misc; + uint8_t max_ul_dec:5, + max_ul_dec_en:1, + rsrvd1:2; + }; + uint8_t enable; + uint8_t flush_queue_en; + uint8_t rsrvd2; + uint16_t shadow_tail; + uint16_t rsrvd3; + uint16_t head_point; + uint16_t rsrvd4; + +}; + +/* Private data structure for each FPGA FEC device */ +struct fpga_5gnr_fec_device { + /** Base address of MMIO registers (BAR0) */ + void *mmio_base; + /** Base address of memory for sw rings */ + void *sw_rings; + /** Physical address of sw_rings */ + rte_iova_t sw_rings_phys; + /** Number of bytes available for each queue in device. */ + uint32_t sw_ring_size; + /** Max number of entries available for each queue in device */ + uint32_t sw_ring_max_depth; + /** Base address of response tail pointer buffer */ + uint32_t *tail_ptrs; + /** Physical address of tail pointers */ + rte_iova_t tail_ptr_phys; + /** Queues flush completion flag */ + uint64_t *flush_queue_status; + /* Bitmap capturing which Queues are bound to the PF/VF */ + uint64_t q_bound_bit_map; + /* Bitmap capturing which Queues have already been assigned */ + uint64_t q_assigned_bit_map; + /** True if this is a PF FPGA FEC device */ + bool pf_device; +}; + +/* Structure associated with each queue. */ +struct __rte_cache_aligned fpga_queue { + struct fpga_ring_ctrl_reg ring_ctrl_reg; /* Ring Control Register */ + union fpga_dma_desc *ring_addr; /* Virtual address of software ring */ + uint64_t *ring_head_addr; /* Virtual address of completion_head */ + uint64_t shadow_completion_head; /* Shadow completion head value */ + uint16_t head_free_desc; /* Ring head */ + uint16_t tail; /* Ring tail */ + /* Mask used to wrap enqueued descriptors on the sw ring */ + uint32_t sw_ring_wrap_mask; + uint32_t irq_enable; /* Enable ops dequeue interrupts if set to 1 */ + uint8_t q_idx; /* Queue index */ + struct fpga_5gnr_fec_device *d; + /* MMIO register of shadow_tail used to enqueue descriptors */ + void *shadow_tail_addr; +}; + +/* Write to 16 bit MMIO register address */ +static inline void +mmio_write_16(void *addr, uint16_t value) +{ + *((volatile uint16_t *)(addr)) = rte_cpu_to_le_16(value); +} + +/* Write to 32 bit MMIO register address */ +static inline void +mmio_write_32(void *addr, uint32_t value) +{ + *((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value); +} + +/* Write to 64 bit MMIO register address */ +static inline void +mmio_write_64(void *addr, uint64_t value) +{ + *((volatile uint64_t *)(addr)) = rte_cpu_to_le_64(value); +} + +/* Write a 8 bit register of a FPGA 5GNR FEC device */ +static inline void +fpga_reg_write_8(void *mmio_base, uint32_t offset, uint8_t payload) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + *((volatile uint8_t *)(reg_addr)) = payload; +} + +/* Write a 16 bit register of a FPGA 5GNR FEC device */ +static inline void +fpga_reg_write_16(void *mmio_base, uint32_t offset, uint16_t payload) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + mmio_write_16(reg_addr, payload); +} + +/* Write a 32 bit register of a FPGA 5GNR FEC device */ +static inline void +fpga_reg_write_32(void *mmio_base, uint32_t offset, uint32_t payload) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + mmio_write_32(reg_addr, payload); +} + +/* Write a 64 bit register of a FPGA 5GNR FEC device */ +static inline void +fpga_reg_write_64(void *mmio_base, uint32_t offset, uint64_t payload) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + mmio_write_64(reg_addr, payload); +} + +/* Write a ring control register of a FPGA 5GNR FEC device */ +static inline void +fpga_ring_reg_write(void *mmio_base, uint32_t offset, + struct fpga_ring_ctrl_reg payload) +{ + fpga_reg_write_64(mmio_base, offset, payload.ring_base_addr); + fpga_reg_write_64(mmio_base, offset + FPGA_5GNR_FEC_RING_HEAD_ADDR, + payload.ring_head_addr); + fpga_reg_write_16(mmio_base, offset + FPGA_5GNR_FEC_RING_SIZE, + payload.ring_size); + fpga_reg_write_16(mmio_base, offset + FPGA_5GNR_FEC_RING_HEAD_POINT, + payload.head_point); + fpga_reg_write_8(mmio_base, offset + FPGA_5GNR_FEC_RING_FLUSH_QUEUE_EN, + payload.flush_queue_en); + fpga_reg_write_16(mmio_base, offset + FPGA_5GNR_FEC_RING_SHADOW_TAIL, + payload.shadow_tail); + fpga_reg_write_8(mmio_base, offset + FPGA_5GNR_FEC_RING_MISC, + payload.misc); + fpga_reg_write_8(mmio_base, offset + FPGA_5GNR_FEC_RING_ENABLE, + payload.enable); +} + +/* Read a register of FPGA 5GNR FEC device */ +static inline uint32_t +fpga_reg_read_32(void *mmio_base, uint32_t offset) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + uint32_t ret = *((volatile uint32_t *)(reg_addr)); + return rte_le_to_cpu_32(ret); +} + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + +/* Read a register of FPGA 5GNR FEC device */ +static inline uint16_t +fpga_reg_read_16(void *mmio_base, uint32_t offset) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + uint16_t ret = *((volatile uint16_t *)(reg_addr)); + return rte_le_to_cpu_16(ret); +} + +#endif + +/* Read a register of FPGA 5GNR FEC device */ +static inline uint8_t +fpga_reg_read_8(void *mmio_base, uint32_t offset) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + return *((volatile uint8_t *)(reg_addr)); +} + +/* Read a register of FPGA 5GNR FEC device */ +static inline uint64_t +fpga_reg_read_64(void *mmio_base, uint32_t offset) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + uint64_t ret = *((volatile uint64_t *)(reg_addr)); + return rte_le_to_cpu_64(ret); +} + +#endif /* _FPGA_5GNR_FEC_H_ */ diff --git a/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/meson.build b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/meson.build new file mode 100644 index 000000000..9d10bcf80 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/meson.build @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2020 Intel Corporation + +deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci'] + +sources = files('rte_fpga_5gnr_fec.c') + +install_headers('rte_pmd_fpga_5gnr_fec.h') diff --git a/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c new file mode 100644 index 000000000..e152b206e --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_fpga_5gnr_fec.c @@ -0,0 +1,2187 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#include <unistd.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_dev.h> +#include <rte_malloc.h> +#include <rte_mempool.h> +#include <rte_errno.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_byteorder.h> +#ifdef RTE_BBDEV_OFFLOAD_COST +#include <rte_cycles.h> +#endif + +#include <rte_bbdev.h> +#include <rte_bbdev_pmd.h> + +#include "fpga_5gnr_fec.h" +#include "rte_pmd_fpga_5gnr_fec.h" + +/* 5GNR SW PMD logging ID */ +static int fpga_5gnr_fec_logtype; + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + +/* Read Ring Control Register of FPGA 5GNR FEC device */ +static inline void +print_ring_reg_debug_info(void *mmio_base, uint32_t offset) +{ + rte_bbdev_log_debug( + "FPGA MMIO base address @ %p | Ring Control Register @ offset = 0x%08" + PRIx32, mmio_base, offset); + rte_bbdev_log_debug( + "RING_BASE_ADDR = 0x%016"PRIx64, + fpga_reg_read_64(mmio_base, offset)); + rte_bbdev_log_debug( + "RING_HEAD_ADDR = 0x%016"PRIx64, + fpga_reg_read_64(mmio_base, offset + + FPGA_5GNR_FEC_RING_HEAD_ADDR)); + rte_bbdev_log_debug( + "RING_SIZE = 0x%04"PRIx16, + fpga_reg_read_16(mmio_base, offset + + FPGA_5GNR_FEC_RING_SIZE)); + rte_bbdev_log_debug( + "RING_MISC = 0x%02"PRIx8, + fpga_reg_read_8(mmio_base, offset + + FPGA_5GNR_FEC_RING_MISC)); + rte_bbdev_log_debug( + "RING_ENABLE = 0x%02"PRIx8, + fpga_reg_read_8(mmio_base, offset + + FPGA_5GNR_FEC_RING_ENABLE)); + rte_bbdev_log_debug( + "RING_FLUSH_QUEUE_EN = 0x%02"PRIx8, + fpga_reg_read_8(mmio_base, offset + + FPGA_5GNR_FEC_RING_FLUSH_QUEUE_EN)); + rte_bbdev_log_debug( + "RING_SHADOW_TAIL = 0x%04"PRIx16, + fpga_reg_read_16(mmio_base, offset + + FPGA_5GNR_FEC_RING_SHADOW_TAIL)); + rte_bbdev_log_debug( + "RING_HEAD_POINT = 0x%04"PRIx16, + fpga_reg_read_16(mmio_base, offset + + FPGA_5GNR_FEC_RING_HEAD_POINT)); +} + +/* Read Static Register of FPGA 5GNR FEC device */ +static inline void +print_static_reg_debug_info(void *mmio_base) +{ + uint16_t config = fpga_reg_read_16(mmio_base, + FPGA_5GNR_FEC_CONFIGURATION); + uint8_t qmap_done = fpga_reg_read_8(mmio_base, + FPGA_5GNR_FEC_QUEUE_PF_VF_MAP_DONE); + uint16_t lb_factor = fpga_reg_read_16(mmio_base, + FPGA_5GNR_FEC_LOAD_BALANCE_FACTOR); + uint16_t ring_desc_len = fpga_reg_read_16(mmio_base, + FPGA_5GNR_FEC_RING_DESC_LEN); + uint16_t flr_time_out = fpga_reg_read_16(mmio_base, + FPGA_5GNR_FEC_FLR_TIME_OUT); + + rte_bbdev_log_debug("UL.DL Weights = %u.%u", + ((uint8_t)config), ((uint8_t)(config >> 8))); + rte_bbdev_log_debug("UL.DL Load Balance = %u.%u", + ((uint8_t)lb_factor), ((uint8_t)(lb_factor >> 8))); + rte_bbdev_log_debug("Queue-PF/VF Mapping Table = %s", + (qmap_done > 0) ? "READY" : "NOT-READY"); + rte_bbdev_log_debug("Ring Descriptor Size = %u bytes", + ring_desc_len*FPGA_RING_DESC_LEN_UNIT_BYTES); + rte_bbdev_log_debug("FLR Timeout = %f usec", + (float)flr_time_out*FPGA_FLR_TIMEOUT_UNIT); +} + +/* Print decode DMA Descriptor of FPGA 5GNR Decoder device */ +static void +print_dma_dec_desc_debug_info(union fpga_dma_desc *desc) +{ + rte_bbdev_log_debug("DMA response desc %p\n" + "\t-- done(%"PRIu32") | iter(%"PRIu32") | et_pass(%"PRIu32")" + " | crcb_pass (%"PRIu32") | error(%"PRIu32")\n" + "\t-- qm_idx(%"PRIu32") | max_iter(%"PRIu32") | " + "bg_idx (%"PRIu32") | harqin_en(%"PRIu32") | zc(%"PRIu32")\n" + "\t-- hbstroe_offset(%"PRIu32") | num_null (%"PRIu32") " + "| irq_en(%"PRIu32")\n" + "\t-- ncb(%"PRIu32") | desc_idx (%"PRIu32") | " + "drop_crc24b(%"PRIu32") | RV (%"PRIu32")\n" + "\t-- crc24b_ind(%"PRIu32") | et_dis (%"PRIu32")\n" + "\t-- harq_input_length(%"PRIu32") | rm_e(%"PRIu32")\n" + "\t-- cbs_in_op(%"PRIu32") | in_add (0x%08"PRIx32"%08"PRIx32")" + "| out_add (0x%08"PRIx32"%08"PRIx32")", + desc, + (uint32_t)desc->dec_req.done, + (uint32_t)desc->dec_req.iter, + (uint32_t)desc->dec_req.et_pass, + (uint32_t)desc->dec_req.crcb_pass, + (uint32_t)desc->dec_req.error, + (uint32_t)desc->dec_req.qm_idx, + (uint32_t)desc->dec_req.max_iter, + (uint32_t)desc->dec_req.bg_idx, + (uint32_t)desc->dec_req.harqin_en, + (uint32_t)desc->dec_req.zc, + (uint32_t)desc->dec_req.hbstroe_offset, + (uint32_t)desc->dec_req.num_null, + (uint32_t)desc->dec_req.irq_en, + (uint32_t)desc->dec_req.ncb, + (uint32_t)desc->dec_req.desc_idx, + (uint32_t)desc->dec_req.drop_crc24b, + (uint32_t)desc->dec_req.rv, + (uint32_t)desc->dec_req.crc24b_ind, + (uint32_t)desc->dec_req.et_dis, + (uint32_t)desc->dec_req.harq_input_length, + (uint32_t)desc->dec_req.rm_e, + (uint32_t)desc->dec_req.cbs_in_op, + (uint32_t)desc->dec_req.in_addr_hi, + (uint32_t)desc->dec_req.in_addr_lw, + (uint32_t)desc->dec_req.out_addr_hi, + (uint32_t)desc->dec_req.out_addr_lw); + uint32_t *word = (uint32_t *) desc; + rte_bbdev_log_debug("%08"PRIx32"\n%08"PRIx32"\n%08"PRIx32"\n%08"PRIx32"\n" + "%08"PRIx32"\n%08"PRIx32"\n%08"PRIx32"\n%08"PRIx32"\n", + word[0], word[1], word[2], word[3], + word[4], word[5], word[6], word[7]); +} + +/* Print decode DMA Descriptor of FPGA 5GNR encoder device */ +static void +print_dma_enc_desc_debug_info(union fpga_dma_desc *desc) +{ + rte_bbdev_log_debug("DMA response desc %p\n" + "%"PRIu32" %"PRIu32"\n" + "K' %"PRIu32" E %"PRIu32" desc %"PRIu32" Z %"PRIu32"\n" + "BG %"PRIu32" Qm %"PRIu32" CRC %"PRIu32" IRQ %"PRIu32"\n" + "k0 %"PRIu32" Ncb %"PRIu32" F %"PRIu32"\n", + desc, + (uint32_t)desc->enc_req.done, + (uint32_t)desc->enc_req.error, + + (uint32_t)desc->enc_req.k_, + (uint32_t)desc->enc_req.rm_e, + (uint32_t)desc->enc_req.desc_idx, + (uint32_t)desc->enc_req.zc, + + (uint32_t)desc->enc_req.bg_idx, + (uint32_t)desc->enc_req.qm_idx, + (uint32_t)desc->enc_req.crc_en, + (uint32_t)desc->enc_req.irq_en, + + (uint32_t)desc->enc_req.k0, + (uint32_t)desc->enc_req.ncb, + (uint32_t)desc->enc_req.num_null); + uint32_t *word = (uint32_t *) desc; + rte_bbdev_log_debug("%08"PRIx32"\n%08"PRIx32"\n%08"PRIx32"\n%08"PRIx32"\n" + "%08"PRIx32"\n%08"PRIx32"\n%08"PRIx32"\n%08"PRIx32"\n", + word[0], word[1], word[2], word[3], + word[4], word[5], word[6], word[7]); +} + +#endif + +static int +fpga_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id) +{ + /* Number of queues bound to a PF/VF */ + uint32_t hw_q_num = 0; + uint32_t ring_size, payload, address, q_id, offset; + rte_iova_t phys_addr; + struct fpga_ring_ctrl_reg ring_reg; + struct fpga_5gnr_fec_device *fpga_dev = dev->data->dev_private; + + address = FPGA_5GNR_FEC_QUEUE_PF_VF_MAP_DONE; + if (!(fpga_reg_read_32(fpga_dev->mmio_base, address) & 0x1)) { + rte_bbdev_log(ERR, + "Queue-PF/VF mapping is not set! Was PF configured for device (%s) ?", + dev->data->name); + return -EPERM; + } + + /* Clear queue registers structure */ + memset(&ring_reg, 0, sizeof(struct fpga_ring_ctrl_reg)); + + /* Scan queue map. + * If a queue is valid and mapped to a calling PF/VF the read value is + * replaced with a queue ID and if it's not then + * FPGA_INVALID_HW_QUEUE_ID is returned. + */ + for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { + uint32_t hw_q_id = fpga_reg_read_32(fpga_dev->mmio_base, + FPGA_5GNR_FEC_QUEUE_MAP + (q_id << 2)); + + rte_bbdev_log_debug("%s: queue ID: %u, registry queue ID: %u", + dev->device->name, q_id, hw_q_id); + + if (hw_q_id != FPGA_INVALID_HW_QUEUE_ID) { + fpga_dev->q_bound_bit_map |= (1ULL << q_id); + /* Clear queue register of found queue */ + offset = FPGA_5GNR_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q_id); + fpga_ring_reg_write(fpga_dev->mmio_base, + offset, ring_reg); + ++hw_q_num; + } + } + if (hw_q_num == 0) { + rte_bbdev_log(ERR, + "No HW queues assigned to this device. Probably this is a VF configured for PF mode. Check device configuration!"); + return -ENODEV; + } + + if (num_queues > hw_q_num) { + rte_bbdev_log(ERR, + "Not enough queues for device %s! Requested: %u, available: %u", + dev->device->name, num_queues, hw_q_num); + return -EINVAL; + } + + ring_size = FPGA_RING_MAX_SIZE * sizeof(struct fpga_dma_dec_desc); + + /* Enforce 32 byte alignment */ + RTE_BUILD_BUG_ON((RTE_CACHE_LINE_SIZE % 32) != 0); + + /* Allocate memory for SW descriptor rings */ + fpga_dev->sw_rings = rte_zmalloc_socket(dev->device->driver->name, + num_queues * ring_size, RTE_CACHE_LINE_SIZE, + socket_id); + if (fpga_dev->sw_rings == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate memory for %s:%u sw_rings", + dev->device->driver->name, dev->data->dev_id); + return -ENOMEM; + } + + fpga_dev->sw_rings_phys = rte_malloc_virt2iova(fpga_dev->sw_rings); + fpga_dev->sw_ring_size = ring_size; + fpga_dev->sw_ring_max_depth = FPGA_RING_MAX_SIZE; + + /* Allocate memory for ring flush status */ + fpga_dev->flush_queue_status = rte_zmalloc_socket(NULL, + sizeof(uint64_t), RTE_CACHE_LINE_SIZE, socket_id); + if (fpga_dev->flush_queue_status == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate memory for %s:%u flush_queue_status", + dev->device->driver->name, dev->data->dev_id); + return -ENOMEM; + } + + /* Set the flush status address registers */ + phys_addr = rte_malloc_virt2iova(fpga_dev->flush_queue_status); + + address = FPGA_5GNR_FEC_VFQ_FLUSH_STATUS_LW; + payload = (uint32_t)(phys_addr); + fpga_reg_write_32(fpga_dev->mmio_base, address, payload); + + address = FPGA_5GNR_FEC_VFQ_FLUSH_STATUS_HI; + payload = (uint32_t)(phys_addr >> 32); + fpga_reg_write_32(fpga_dev->mmio_base, address, payload); + + return 0; +} + +static int +fpga_dev_close(struct rte_bbdev *dev) +{ + struct fpga_5gnr_fec_device *fpga_dev = dev->data->dev_private; + + rte_free(fpga_dev->sw_rings); + rte_free(fpga_dev->flush_queue_status); + + return 0; +} + +static void +fpga_dev_info_get(struct rte_bbdev *dev, + struct rte_bbdev_driver_info *dev_info) +{ + struct fpga_5gnr_fec_device *d = dev->data->dev_private; + uint32_t q_id = 0; + + static const struct rte_bbdev_op_cap bbdev_capabilities[] = { + { + .type = RTE_BBDEV_OP_LDPC_ENC, + .cap.ldpc_enc = { + .capability_flags = + RTE_BBDEV_LDPC_RATE_MATCH | + RTE_BBDEV_LDPC_ENC_INTERRUPTS | + RTE_BBDEV_LDPC_CRC_24B_ATTACH, + .num_buffers_src = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_dst = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + } + }, + { + .type = RTE_BBDEV_OP_LDPC_DEC, + .cap.ldpc_dec = { + .capability_flags = + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | + RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | + RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | + RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE | + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE | + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE | + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK | + RTE_BBDEV_LDPC_DEC_INTERRUPTS | + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_FILLERS, + .llr_size = 6, + .llr_decimals = 2, + .num_buffers_src = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_hard_out = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_soft_out = 0, + } + }, + RTE_BBDEV_END_OF_CAPABILITIES_LIST() + }; + + /* Check the HARQ DDR size available */ + uint8_t timeout_counter = 0; + uint32_t harq_buf_ready = fpga_reg_read_32(d->mmio_base, + FPGA_5GNR_FEC_HARQ_BUF_SIZE_RDY_REGS); + while (harq_buf_ready != 1) { + usleep(FPGA_TIMEOUT_CHECK_INTERVAL); + timeout_counter++; + harq_buf_ready = fpga_reg_read_32(d->mmio_base, + FPGA_5GNR_FEC_HARQ_BUF_SIZE_RDY_REGS); + if (timeout_counter > FPGA_HARQ_RDY_TIMEOUT) { + rte_bbdev_log(ERR, "HARQ Buffer not ready %d", + harq_buf_ready); + harq_buf_ready = 1; + } + } + uint32_t harq_buf_size = fpga_reg_read_32(d->mmio_base, + FPGA_5GNR_FEC_HARQ_BUF_SIZE_REGS); + + static struct rte_bbdev_queue_conf default_queue_conf; + default_queue_conf.socket = dev->data->socket_id; + default_queue_conf.queue_size = FPGA_RING_MAX_SIZE; + + dev_info->driver_name = dev->device->driver->name; + dev_info->queue_size_lim = FPGA_RING_MAX_SIZE; + dev_info->hardware_accelerated = true; + dev_info->min_alignment = 64; + dev_info->harq_buffer_size = (harq_buf_size >> 10) + 1; + dev_info->default_queue_conf = default_queue_conf; + dev_info->capabilities = bbdev_capabilities; + dev_info->cpu_flag_reqs = NULL; + + /* Calculates number of queues assigned to device */ + dev_info->max_num_queues = 0; + for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { + uint32_t hw_q_id = fpga_reg_read_32(d->mmio_base, + FPGA_5GNR_FEC_QUEUE_MAP + (q_id << 2)); + if (hw_q_id != FPGA_INVALID_HW_QUEUE_ID) + dev_info->max_num_queues++; + } +} + +/** + * Find index of queue bound to current PF/VF which is unassigned. Return -1 + * when there is no available queue + */ +static inline int +fpga_find_free_queue_idx(struct rte_bbdev *dev, + const struct rte_bbdev_queue_conf *conf) +{ + struct fpga_5gnr_fec_device *d = dev->data->dev_private; + uint64_t q_idx; + uint8_t i = 0; + uint8_t range = FPGA_TOTAL_NUM_QUEUES >> 1; + + if (conf->op_type == RTE_BBDEV_OP_LDPC_ENC) { + i = FPGA_NUM_DL_QUEUES; + range = FPGA_TOTAL_NUM_QUEUES; + } + + for (; i < range; ++i) { + q_idx = 1ULL << i; + /* Check if index of queue is bound to current PF/VF */ + if (d->q_bound_bit_map & q_idx) + /* Check if found queue was not already assigned */ + if (!(d->q_assigned_bit_map & q_idx)) { + d->q_assigned_bit_map |= q_idx; + return i; + } + } + + rte_bbdev_log(INFO, "Failed to find free queue on %s", dev->data->name); + + return -1; +} + +static int +fpga_queue_setup(struct rte_bbdev *dev, uint16_t queue_id, + const struct rte_bbdev_queue_conf *conf) +{ + uint32_t address, ring_offset; + struct fpga_5gnr_fec_device *d = dev->data->dev_private; + struct fpga_queue *q; + int8_t q_idx; + + /* Check if there is a free queue to assign */ + q_idx = fpga_find_free_queue_idx(dev, conf); + if (q_idx == -1) + return -1; + + /* Allocate the queue data structure. */ + q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q), + RTE_CACHE_LINE_SIZE, conf->socket); + if (q == NULL) { + /* Mark queue as un-assigned */ + d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); + rte_bbdev_log(ERR, "Failed to allocate queue memory"); + return -ENOMEM; + } + + q->d = d; + q->q_idx = q_idx; + + /* Set ring_base_addr */ + q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id)); + q->ring_ctrl_reg.ring_base_addr = d->sw_rings_phys + + (d->sw_ring_size * queue_id); + + /* Allocate memory for Completion Head variable*/ + q->ring_head_addr = rte_zmalloc_socket(dev->device->driver->name, + sizeof(uint64_t), RTE_CACHE_LINE_SIZE, conf->socket); + if (q->ring_head_addr == NULL) { + /* Mark queue as un-assigned */ + d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); + rte_free(q); + rte_bbdev_log(ERR, + "Failed to allocate memory for %s:%u completion_head", + dev->device->driver->name, dev->data->dev_id); + return -ENOMEM; + } + /* Set ring_head_addr */ + q->ring_ctrl_reg.ring_head_addr = + rte_malloc_virt2iova(q->ring_head_addr); + + /* Clear shadow_completion_head */ + q->shadow_completion_head = 0; + + /* Set ring_size */ + if (conf->queue_size > FPGA_RING_MAX_SIZE) { + /* Mark queue as un-assigned */ + d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); + rte_free(q->ring_head_addr); + rte_free(q); + rte_bbdev_log(ERR, + "Size of queue is too big %d (MAX: %d ) for %s:%u", + conf->queue_size, FPGA_RING_MAX_SIZE, + dev->device->driver->name, dev->data->dev_id); + return -EINVAL; + } + q->ring_ctrl_reg.ring_size = conf->queue_size; + + /* Set Miscellaneous FPGA register*/ + /* Max iteration number for TTI mitigation - todo */ + q->ring_ctrl_reg.max_ul_dec = 0; + /* Enable max iteration number for TTI - todo */ + q->ring_ctrl_reg.max_ul_dec_en = 0; + + /* Enable the ring */ + q->ring_ctrl_reg.enable = 1; + + /* Set FPGA head_point and tail registers */ + q->ring_ctrl_reg.head_point = q->tail = 0; + + /* Set FPGA shadow_tail register */ + q->ring_ctrl_reg.shadow_tail = q->tail; + + /* Calculates the ring offset for found queue */ + ring_offset = FPGA_5GNR_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q_idx); + + /* Set FPGA Ring Control Registers */ + fpga_ring_reg_write(d->mmio_base, ring_offset, q->ring_ctrl_reg); + + /* Store MMIO register of shadow_tail */ + address = ring_offset + FPGA_5GNR_FEC_RING_SHADOW_TAIL; + q->shadow_tail_addr = RTE_PTR_ADD(d->mmio_base, address); + + q->head_free_desc = q->tail; + + /* Set wrap mask */ + q->sw_ring_wrap_mask = conf->queue_size - 1; + + rte_bbdev_log_debug("Setup dev%u q%u: queue_idx=%u", + dev->data->dev_id, queue_id, q->q_idx); + + dev->data->queues[queue_id].queue_private = q; + + rte_bbdev_log_debug("BBDEV queue[%d] set up for FPGA queue[%d]", + queue_id, q_idx); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + /* Read FPGA Ring Control Registers after configuration*/ + print_ring_reg_debug_info(d->mmio_base, ring_offset); +#endif + return 0; +} + +static int +fpga_queue_release(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_5gnr_fec_device *d = dev->data->dev_private; + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + struct fpga_ring_ctrl_reg ring_reg; + uint32_t offset; + + rte_bbdev_log_debug("FPGA Queue[%d] released", queue_id); + + if (q != NULL) { + memset(&ring_reg, 0, sizeof(struct fpga_ring_ctrl_reg)); + offset = FPGA_5GNR_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); + /* Disable queue */ + fpga_reg_write_8(d->mmio_base, + offset + FPGA_5GNR_FEC_RING_ENABLE, 0x00); + /* Clear queue registers */ + fpga_ring_reg_write(d->mmio_base, offset, ring_reg); + + /* Mark the Queue as un-assigned */ + d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q->q_idx)); + rte_free(q->ring_head_addr); + rte_free(q); + dev->data->queues[queue_id].queue_private = NULL; + } + + return 0; +} + +/* Function starts a device queue. */ +static int +fpga_queue_start(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_5gnr_fec_device *d = dev->data->dev_private; +#ifdef RTE_LIBRTE_BBDEV_DEBUG + if (d == NULL) { + rte_bbdev_log(ERR, "Invalid device pointer"); + return -1; + } +#endif + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + uint32_t offset = FPGA_5GNR_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); + uint8_t enable = 0x01; + uint16_t zero = 0x0000; + + /* Clear queue head and tail variables */ + q->tail = q->head_free_desc = 0; + + /* Clear FPGA head_point and tail registers */ + fpga_reg_write_16(d->mmio_base, offset + FPGA_5GNR_FEC_RING_HEAD_POINT, + zero); + fpga_reg_write_16(d->mmio_base, offset + FPGA_5GNR_FEC_RING_SHADOW_TAIL, + zero); + + /* Enable queue */ + fpga_reg_write_8(d->mmio_base, offset + FPGA_5GNR_FEC_RING_ENABLE, + enable); + + rte_bbdev_log_debug("FPGA Queue[%d] started", queue_id); + return 0; +} + +/* Function stops a device queue. */ +static int +fpga_queue_stop(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_5gnr_fec_device *d = dev->data->dev_private; +#ifdef RTE_LIBRTE_BBDEV_DEBUG + if (d == NULL) { + rte_bbdev_log(ERR, "Invalid device pointer"); + return -1; + } +#endif + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + uint32_t offset = FPGA_5GNR_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); + uint8_t payload = 0x01; + uint8_t counter = 0; + uint8_t timeout = FPGA_QUEUE_FLUSH_TIMEOUT_US / + FPGA_TIMEOUT_CHECK_INTERVAL; + + /* Set flush_queue_en bit to trigger queue flushing */ + fpga_reg_write_8(d->mmio_base, + offset + FPGA_5GNR_FEC_RING_FLUSH_QUEUE_EN, payload); + + /** Check if queue flush is completed. + * FPGA will update the completion flag after queue flushing is + * completed. If completion flag is not updated within 1ms it is + * considered as a failure. + */ + while (!(*((volatile uint8_t *)d->flush_queue_status + q->q_idx) + & payload)) { + if (counter > timeout) { + rte_bbdev_log(ERR, "FPGA Queue Flush failed for queue %d", + queue_id); + return -1; + } + usleep(FPGA_TIMEOUT_CHECK_INTERVAL); + counter++; + } + + /* Disable queue */ + payload = 0x00; + fpga_reg_write_8(d->mmio_base, offset + FPGA_5GNR_FEC_RING_ENABLE, + payload); + + rte_bbdev_log_debug("FPGA Queue[%d] stopped", queue_id); + return 0; +} + +static inline uint16_t +get_queue_id(struct rte_bbdev_data *data, uint8_t q_idx) +{ + uint16_t queue_id; + + for (queue_id = 0; queue_id < data->num_queues; ++queue_id) { + struct fpga_queue *q = data->queues[queue_id].queue_private; + if (q != NULL && q->q_idx == q_idx) + return queue_id; + } + + return -1; +} + +/* Interrupt handler triggered by FPGA dev for handling specific interrupt */ +static void +fpga_dev_interrupt_handler(void *cb_arg) +{ + struct rte_bbdev *dev = cb_arg; + struct fpga_5gnr_fec_device *fpga_dev = dev->data->dev_private; + struct fpga_queue *q; + uint64_t ring_head; + uint64_t q_idx; + uint16_t queue_id; + uint8_t i; + + /* Scan queue assigned to this device */ + for (i = 0; i < FPGA_TOTAL_NUM_QUEUES; ++i) { + q_idx = 1ULL << i; + if (fpga_dev->q_bound_bit_map & q_idx) { + queue_id = get_queue_id(dev->data, i); + if (queue_id == (uint16_t) -1) + continue; + + /* Check if completion head was changed */ + q = dev->data->queues[queue_id].queue_private; + ring_head = *q->ring_head_addr; + if (q->shadow_completion_head != ring_head && + q->irq_enable == 1) { + q->shadow_completion_head = ring_head; + rte_bbdev_pmd_callback_process( + dev, + RTE_BBDEV_EVENT_DEQUEUE, + &queue_id); + } + } + } +} + +static int +fpga_queue_intr_enable(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + + if (!rte_intr_cap_multiple(dev->intr_handle)) + return -ENOTSUP; + + q->irq_enable = 1; + + return 0; +} + +static int +fpga_queue_intr_disable(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + q->irq_enable = 0; + + return 0; +} + +static int +fpga_intr_enable(struct rte_bbdev *dev) +{ + int ret; + uint8_t i; + + if (!rte_intr_cap_multiple(dev->intr_handle)) { + rte_bbdev_log(ERR, "Multiple intr vector is not supported by FPGA (%s)", + dev->data->name); + return -ENOTSUP; + } + + /* Create event file descriptors for each of 64 queue. Event fds will be + * mapped to FPGA IRQs in rte_intr_enable(). This is a 1:1 mapping where + * the IRQ number is a direct translation to the queue number. + * + * 63 (FPGA_NUM_INTR_VEC) event fds are created as rte_intr_enable() + * mapped the first IRQ to already created interrupt event file + * descriptor (intr_handle->fd). + */ + if (rte_intr_efd_enable(dev->intr_handle, FPGA_NUM_INTR_VEC)) { + rte_bbdev_log(ERR, "Failed to create fds for %u queues", + dev->data->num_queues); + return -1; + } + + /* TODO Each event file descriptor is overwritten by interrupt event + * file descriptor. That descriptor is added to epoll observed list. + * It ensures that callback function assigned to that descriptor will + * invoked when any FPGA queue issues interrupt. + */ + for (i = 0; i < FPGA_NUM_INTR_VEC; ++i) + dev->intr_handle->efds[i] = dev->intr_handle->fd; + + if (!dev->intr_handle->intr_vec) { + dev->intr_handle->intr_vec = rte_zmalloc("intr_vec", + dev->data->num_queues * sizeof(int), 0); + if (!dev->intr_handle->intr_vec) { + rte_bbdev_log(ERR, "Failed to allocate %u vectors", + dev->data->num_queues); + return -ENOMEM; + } + } + + ret = rte_intr_enable(dev->intr_handle); + if (ret < 0) { + rte_bbdev_log(ERR, + "Couldn't enable interrupts for device: %s", + dev->data->name); + return ret; + } + + ret = rte_intr_callback_register(dev->intr_handle, + fpga_dev_interrupt_handler, dev); + if (ret < 0) { + rte_bbdev_log(ERR, + "Couldn't register interrupt callback for device: %s", + dev->data->name); + return ret; + } + + return 0; +} + +static const struct rte_bbdev_ops fpga_ops = { + .setup_queues = fpga_setup_queues, + .intr_enable = fpga_intr_enable, + .close = fpga_dev_close, + .info_get = fpga_dev_info_get, + .queue_setup = fpga_queue_setup, + .queue_stop = fpga_queue_stop, + .queue_start = fpga_queue_start, + .queue_release = fpga_queue_release, + .queue_intr_enable = fpga_queue_intr_enable, + .queue_intr_disable = fpga_queue_intr_disable +}; + +static inline void +fpga_dma_enqueue(struct fpga_queue *q, uint16_t num_desc, + struct rte_bbdev_stats *queue_stats) +{ +#ifdef RTE_BBDEV_OFFLOAD_COST + uint64_t start_time = 0; + queue_stats->acc_offload_cycles = 0; +#else + RTE_SET_USED(queue_stats); +#endif + + /* Update tail and shadow_tail register */ + q->tail = (q->tail + num_desc) & q->sw_ring_wrap_mask; + + rte_wmb(); + +#ifdef RTE_BBDEV_OFFLOAD_COST + /* Start time measurement for enqueue function offload. */ + start_time = rte_rdtsc_precise(); +#endif + mmio_write_16(q->shadow_tail_addr, q->tail); + +#ifdef RTE_BBDEV_OFFLOAD_COST + rte_wmb(); + queue_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif +} + +/* Read flag value 0/1/ from bitmap */ +static inline bool +check_bit(uint32_t bitmap, uint32_t bitmask) +{ + return bitmap & bitmask; +} + +/* Print an error if a descriptor error has occurred. + * Return 0 on success, 1 on failure + */ +static inline int +check_desc_error(uint32_t error_code) { + switch (error_code) { + case DESC_ERR_NO_ERR: + return 0; + case DESC_ERR_K_P_OUT_OF_RANGE: + rte_bbdev_log(ERR, "Encode block size K' is out of range"); + break; + case DESC_ERR_Z_C_NOT_LEGAL: + rte_bbdev_log(ERR, "Zc is illegal"); + break; + case DESC_ERR_DESC_OFFSET_ERR: + rte_bbdev_log(ERR, + "Queue offset does not meet the expectation in the FPGA" + ); + break; + case DESC_ERR_DESC_READ_FAIL: + rte_bbdev_log(ERR, "Unsuccessful completion for descriptor read"); + break; + case DESC_ERR_DESC_READ_TIMEOUT: + rte_bbdev_log(ERR, "Descriptor read time-out"); + break; + case DESC_ERR_DESC_READ_TLP_POISONED: + rte_bbdev_log(ERR, "Descriptor read TLP poisoned"); + break; + case DESC_ERR_CB_READ_FAIL: + rte_bbdev_log(ERR, "Unsuccessful completion for code block"); + break; + case DESC_ERR_CB_READ_TIMEOUT: + rte_bbdev_log(ERR, "Code block read time-out"); + break; + case DESC_ERR_CB_READ_TLP_POISONED: + rte_bbdev_log(ERR, "Code block read TLP poisoned"); + break; + case DESC_ERR_HBSTORE_ERR: + rte_bbdev_log(ERR, "Hbstroe exceeds HARQ buffer size."); + break; + default: + rte_bbdev_log(ERR, "Descriptor error unknown error code %u", + error_code); + break; + } + return 1; +} + +/* Compute value of k0. + * Based on 3GPP 38.212 Table 5.4.2.1-2 + * Starting position of different redundancy versions, k0 + */ +static inline uint16_t +get_k0(uint16_t n_cb, uint16_t z_c, uint8_t bg, uint8_t rv_index) +{ + if (rv_index == 0) + return 0; + uint16_t n = (bg == 1 ? N_ZC_1 : N_ZC_2) * z_c; + if (n_cb == n) { + if (rv_index == 1) + return (bg == 1 ? K0_1_1 : K0_1_2) * z_c; + else if (rv_index == 2) + return (bg == 1 ? K0_2_1 : K0_2_2) * z_c; + else + return (bg == 1 ? K0_3_1 : K0_3_2) * z_c; + } + /* LBRM case - includes a division by N */ + if (rv_index == 1) + return (((bg == 1 ? K0_1_1 : K0_1_2) * n_cb) + / n) * z_c; + else if (rv_index == 2) + return (((bg == 1 ? K0_2_1 : K0_2_2) * n_cb) + / n) * z_c; + else + return (((bg == 1 ? K0_3_1 : K0_3_2) * n_cb) + / n) * z_c; +} + +/** + * Set DMA descriptor for encode operation (1 Code Block) + * + * @param op + * Pointer to a single encode operation. + * @param desc + * Pointer to DMA descriptor. + * @param input + * Pointer to pointer to input data which will be decoded. + * @param e + * E value (length of output in bits). + * @param ncb + * Ncb value (size of the soft buffer). + * @param out_length + * Length of output buffer + * @param in_offset + * Input offset in rte_mbuf structure. It is used for calculating the point + * where data is starting. + * @param out_offset + * Output offset in rte_mbuf structure. It is used for calculating the point + * where hard output data will be stored. + * @param cbs_in_op + * Number of CBs contained in one operation. + */ +static inline int +fpga_dma_desc_te_fill(struct rte_bbdev_enc_op *op, + struct fpga_dma_enc_desc *desc, struct rte_mbuf *input, + struct rte_mbuf *output, uint16_t k_, uint16_t e, + uint32_t in_offset, uint32_t out_offset, uint16_t desc_offset, + uint8_t cbs_in_op) +{ + /* reset */ + desc->done = 0; + desc->error = 0; + desc->k_ = k_; + desc->rm_e = e; + desc->desc_idx = desc_offset; + desc->zc = op->ldpc_enc.z_c; + desc->bg_idx = op->ldpc_enc.basegraph - 1; + desc->qm_idx = op->ldpc_enc.q_m / 2; + desc->crc_en = check_bit(op->ldpc_enc.op_flags, + RTE_BBDEV_LDPC_CRC_24B_ATTACH); + desc->irq_en = 0; + desc->k0 = get_k0(op->ldpc_enc.n_cb, op->ldpc_enc.z_c, + op->ldpc_enc.basegraph, op->ldpc_enc.rv_index); + desc->ncb = op->ldpc_enc.n_cb; + desc->num_null = op->ldpc_enc.n_filler; + /* Set inbound data buffer address */ + desc->in_addr_hi = (uint32_t)( + rte_pktmbuf_mtophys_offset(input, in_offset) >> 32); + desc->in_addr_lw = (uint32_t)( + rte_pktmbuf_mtophys_offset(input, in_offset)); + + desc->out_addr_hi = (uint32_t)( + rte_pktmbuf_mtophys_offset(output, out_offset) >> 32); + desc->out_addr_lw = (uint32_t)( + rte_pktmbuf_mtophys_offset(output, out_offset)); + /* Save software context needed for dequeue */ + desc->op_addr = op; + /* Set total number of CBs in an op */ + desc->cbs_in_op = cbs_in_op; + return 0; +} + +/** + * Set DMA descriptor for decode operation (1 Code Block) + * + * @param op + * Pointer to a single encode operation. + * @param desc + * Pointer to DMA descriptor. + * @param input + * Pointer to pointer to input data which will be decoded. + * @param in_offset + * Input offset in rte_mbuf structure. It is used for calculating the point + * where data is starting. + * @param out_offset + * Output offset in rte_mbuf structure. It is used for calculating the point + * where hard output data will be stored. + * @param cbs_in_op + * Number of CBs contained in one operation. + */ +static inline int +fpga_dma_desc_ld_fill(struct rte_bbdev_dec_op *op, + struct fpga_dma_dec_desc *desc, + struct rte_mbuf *input, struct rte_mbuf *output, + uint16_t harq_in_length, + uint32_t in_offset, uint32_t out_offset, + uint32_t harq_offset, + uint16_t desc_offset, + uint8_t cbs_in_op) +{ + /* reset */ + desc->done = 0; + desc->error = 0; + /* Set inbound data buffer address */ + desc->in_addr_hi = (uint32_t)( + rte_pktmbuf_mtophys_offset(input, in_offset) >> 32); + desc->in_addr_lw = (uint32_t)( + rte_pktmbuf_mtophys_offset(input, in_offset)); + desc->rm_e = op->ldpc_dec.cb_params.e; + desc->harq_input_length = harq_in_length; + desc->et_dis = !check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); + desc->rv = op->ldpc_dec.rv_index; + desc->crc24b_ind = check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK); + desc->drop_crc24b = check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP); + desc->desc_idx = desc_offset; + desc->ncb = op->ldpc_dec.n_cb; + desc->num_null = op->ldpc_dec.n_filler; + desc->hbstroe_offset = harq_offset >> 10; + desc->zc = op->ldpc_dec.z_c; + desc->harqin_en = check_bit(op->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); + desc->bg_idx = op->ldpc_dec.basegraph - 1; + desc->max_iter = op->ldpc_dec.iter_max; + desc->qm_idx = op->ldpc_dec.q_m / 2; + desc->out_addr_hi = (uint32_t)( + rte_pktmbuf_mtophys_offset(output, out_offset) >> 32); + desc->out_addr_lw = (uint32_t)( + rte_pktmbuf_mtophys_offset(output, out_offset)); + /* Save software context needed for dequeue */ + desc->op_addr = op; + /* Set total number of CBs in an op */ + desc->cbs_in_op = cbs_in_op; + + return 0; +} + +#ifdef RTE_LIBRTE_BBDEV_DEBUG +/* Validates LDPC encoder parameters */ +static int +validate_enc_op(struct rte_bbdev_enc_op *op __rte_unused) +{ + struct rte_bbdev_op_ldpc_enc *ldpc_enc = &op->ldpc_enc; + struct rte_bbdev_op_enc_ldpc_cb_params *cb = NULL; + struct rte_bbdev_op_enc_ldpc_tb_params *tb = NULL; + + + if (ldpc_enc->input.length > + RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) { + rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d", + ldpc_enc->input.length, + RTE_BBDEV_LDPC_MAX_CB_SIZE); + return -1; + } + + if (op->mempool == NULL) { + rte_bbdev_log(ERR, "Invalid mempool pointer"); + return -1; + } + if (ldpc_enc->input.data == NULL) { + rte_bbdev_log(ERR, "Invalid input pointer"); + return -1; + } + if (ldpc_enc->output.data == NULL) { + rte_bbdev_log(ERR, "Invalid output pointer"); + return -1; + } + if ((ldpc_enc->basegraph > 2) || (ldpc_enc->basegraph == 0)) { + rte_bbdev_log(ERR, + "basegraph (%u) is out of range 1 <= value <= 2", + ldpc_enc->basegraph); + return -1; + } + if (ldpc_enc->code_block_mode > 1) { + rte_bbdev_log(ERR, + "code_block_mode (%u) is out of range 0:Tb 1:CB", + ldpc_enc->code_block_mode); + return -1; + } + + if (ldpc_enc->code_block_mode == 0) { + tb = &ldpc_enc->tb_params; + if (tb->c == 0) { + rte_bbdev_log(ERR, + "c (%u) is out of range 1 <= value <= %u", + tb->c, RTE_BBDEV_LDPC_MAX_CODE_BLOCKS); + return -1; + } + if (tb->cab > tb->c) { + rte_bbdev_log(ERR, + "cab (%u) is greater than c (%u)", + tb->cab, tb->c); + return -1; + } + if ((tb->ea < RTE_BBDEV_LDPC_MIN_CB_SIZE) + && tb->r < tb->cab) { + rte_bbdev_log(ERR, + "ea (%u) is less than %u or it is not even", + tb->ea, RTE_BBDEV_LDPC_MIN_CB_SIZE); + return -1; + } + if ((tb->eb < RTE_BBDEV_LDPC_MIN_CB_SIZE) + && tb->c > tb->cab) { + rte_bbdev_log(ERR, + "eb (%u) is less than %u", + tb->eb, RTE_BBDEV_LDPC_MIN_CB_SIZE); + return -1; + } + if (tb->r > (tb->c - 1)) { + rte_bbdev_log(ERR, + "r (%u) is greater than c - 1 (%u)", + tb->r, tb->c - 1); + return -1; + } + } else { + cb = &ldpc_enc->cb_params; + if (cb->e < RTE_BBDEV_LDPC_MIN_CB_SIZE) { + rte_bbdev_log(ERR, + "e (%u) is less than %u or it is not even", + cb->e, RTE_BBDEV_LDPC_MIN_CB_SIZE); + return -1; + } + } + return 0; +} +#endif + +static inline char * +mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) +{ + if (unlikely(len > rte_pktmbuf_tailroom(m))) + return NULL; + + char *tail = (char *)m->buf_addr + m->data_off + m->data_len; + m->data_len = (uint16_t)(m->data_len + len); + m_head->pkt_len = (m_head->pkt_len + len); + return tail; +} + +#ifdef RTE_LIBRTE_BBDEV_DEBUG +/* Validates LDPC decoder parameters */ +static int +validate_dec_op(struct rte_bbdev_dec_op *op __rte_unused) +{ + struct rte_bbdev_op_ldpc_dec *ldpc_dec = &op->ldpc_dec; + struct rte_bbdev_op_dec_ldpc_cb_params *cb = NULL; + struct rte_bbdev_op_dec_ldpc_tb_params *tb = NULL; + + if (op->mempool == NULL) { + rte_bbdev_log(ERR, "Invalid mempool pointer"); + return -1; + } + if (ldpc_dec->rv_index > 3) { + rte_bbdev_log(ERR, + "rv_index (%u) is out of range 0 <= value <= 3", + ldpc_dec->rv_index); + return -1; + } + + if (ldpc_dec->iter_max == 0) { + rte_bbdev_log(ERR, + "iter_max (%u) is equal to 0", + ldpc_dec->iter_max); + return -1; + } + + if (ldpc_dec->code_block_mode > 1) { + rte_bbdev_log(ERR, + "code_block_mode (%u) is out of range 0 <= value <= 1", + ldpc_dec->code_block_mode); + return -1; + } + + if (ldpc_dec->code_block_mode == 0) { + tb = &ldpc_dec->tb_params; + if (tb->c < 1) { + rte_bbdev_log(ERR, + "c (%u) is out of range 1 <= value <= %u", + tb->c, RTE_BBDEV_LDPC_MAX_CODE_BLOCKS); + return -1; + } + if (tb->cab > tb->c) { + rte_bbdev_log(ERR, + "cab (%u) is greater than c (%u)", + tb->cab, tb->c); + return -1; + } + } else { + cb = &ldpc_dec->cb_params; + if (cb->e < RTE_BBDEV_LDPC_MIN_CB_SIZE) { + rte_bbdev_log(ERR, + "e (%u) is out of range %u <= value <= %u", + cb->e, RTE_BBDEV_LDPC_MIN_CB_SIZE, + RTE_BBDEV_LDPC_MAX_CB_SIZE); + return -1; + } + } + + return 0; +} +#endif + +static inline int +fpga_harq_write_loopback(struct fpga_5gnr_fec_device *fpga_dev, + struct rte_mbuf *harq_input, uint16_t harq_in_length, + uint32_t harq_in_offset, uint32_t harq_out_offset) +{ + uint32_t out_offset = harq_out_offset; + uint32_t in_offset = harq_in_offset; + uint32_t left_length = harq_in_length; + uint32_t reg_32, increment = 0; + uint64_t *input = NULL; + uint32_t last_transaction = left_length + % FPGA_5GNR_FEC_DDR_WR_DATA_LEN_IN_BYTES; + uint64_t last_word; + + if (last_transaction > 0) + left_length -= last_transaction; + + /* + * Get HARQ buffer size for each VF/PF: When 0x00, there is no + * available DDR space for the corresponding VF/PF. + */ + reg_32 = fpga_reg_read_32(fpga_dev->mmio_base, + FPGA_5GNR_FEC_HARQ_BUF_SIZE_REGS); + if (reg_32 < harq_in_length) { + left_length = reg_32; + rte_bbdev_log(ERR, "HARQ in length > HARQ buffer size\n"); + } + + input = (uint64_t *)rte_pktmbuf_mtod_offset(harq_input, + uint8_t *, in_offset); + + while (left_length > 0) { + if (fpga_reg_read_8(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_ADDR_RDY_REGS) == 1) { + fpga_reg_write_32(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_WR_ADDR_REGS, + out_offset); + fpga_reg_write_64(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_WR_DATA_REGS, + input[increment]); + left_length -= FPGA_5GNR_FEC_DDR_WR_DATA_LEN_IN_BYTES; + out_offset += FPGA_5GNR_FEC_DDR_WR_DATA_LEN_IN_BYTES; + increment++; + fpga_reg_write_8(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_WR_DONE_REGS, 1); + } + } + while (last_transaction > 0) { + if (fpga_reg_read_8(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_ADDR_RDY_REGS) == 1) { + fpga_reg_write_32(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_WR_ADDR_REGS, + out_offset); + last_word = input[increment]; + last_word &= (uint64_t)(1 << (last_transaction * 4)) + - 1; + fpga_reg_write_64(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_WR_DATA_REGS, + last_word); + fpga_reg_write_8(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_WR_DONE_REGS, 1); + last_transaction = 0; + } + } + return 1; +} + +static inline int +fpga_harq_read_loopback(struct fpga_5gnr_fec_device *fpga_dev, + struct rte_mbuf *harq_output, uint16_t harq_in_length, + uint32_t harq_in_offset, uint32_t harq_out_offset) +{ + uint32_t left_length, in_offset = harq_in_offset; + uint64_t reg; + uint32_t increment = 0; + uint64_t *input = NULL; + uint32_t last_transaction = harq_in_length + % FPGA_5GNR_FEC_DDR_WR_DATA_LEN_IN_BYTES; + + if (last_transaction > 0) + harq_in_length += (8 - last_transaction); + + reg = fpga_reg_read_32(fpga_dev->mmio_base, + FPGA_5GNR_FEC_HARQ_BUF_SIZE_REGS); + if (reg < harq_in_length) { + harq_in_length = reg; + rte_bbdev_log(ERR, "HARQ in length > HARQ buffer size\n"); + } + + if (!mbuf_append(harq_output, harq_output, harq_in_length)) { + rte_bbdev_log(ERR, "HARQ output buffer warning %d %d\n", + harq_output->buf_len - + rte_pktmbuf_headroom(harq_output), + harq_in_length); + harq_in_length = harq_output->buf_len - + rte_pktmbuf_headroom(harq_output); + if (!mbuf_append(harq_output, harq_output, harq_in_length)) { + rte_bbdev_log(ERR, "HARQ output buffer issue %d %d\n", + harq_output->buf_len, harq_in_length); + return -1; + } + } + left_length = harq_in_length; + + input = (uint64_t *)rte_pktmbuf_mtod_offset(harq_output, + uint8_t *, harq_out_offset); + + while (left_length > 0) { + fpga_reg_write_32(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_RD_ADDR_REGS, in_offset); + fpga_reg_write_8(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_RD_DONE_REGS, 1); + reg = fpga_reg_read_8(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_RD_RDY_REGS); + while (reg != 1) { + reg = fpga_reg_read_8(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_RD_RDY_REGS); + if (reg == FPGA_DDR_OVERFLOW) { + rte_bbdev_log(ERR, + "Read address is overflow!\n"); + return -1; + } + } + input[increment] = fpga_reg_read_64(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_RD_DATA_REGS); + left_length -= FPGA_5GNR_FEC_DDR_RD_DATA_LEN_IN_BYTES; + in_offset += FPGA_5GNR_FEC_DDR_WR_DATA_LEN_IN_BYTES; + increment++; + fpga_reg_write_8(fpga_dev->mmio_base, + FPGA_5GNR_FEC_DDR4_RD_DONE_REGS, 0); + } + return 1; +} + +static inline int +enqueue_ldpc_enc_one_op_cb(struct fpga_queue *q, struct rte_bbdev_enc_op *op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + int ret; + uint8_t c, crc24_bits = 0; + struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; + uint16_t in_offset = enc->input.offset; + uint16_t out_offset = enc->output.offset; + struct rte_mbuf *m_in = enc->input.data; + struct rte_mbuf *m_out = enc->output.data; + struct rte_mbuf *m_out_head = enc->output.data; + uint32_t in_length, out_length, e; + uint16_t total_left = enc->input.length; + uint16_t ring_offset; + uint16_t K, k_; + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + /* Validate op structure */ + /* FIXME */ + if (validate_enc_op(op) == -1) { + rte_bbdev_log(ERR, "LDPC encoder validation failed"); + return -EINVAL; + } +#endif + + /* Clear op status */ + op->status = 0; + + if (m_in == NULL || m_out == NULL) { + rte_bbdev_log(ERR, "Invalid mbuf pointer"); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return -EINVAL; + } + + if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) + crc24_bits = 24; + + if (enc->code_block_mode == 0) { + /* For Transport Block mode */ + /* FIXME */ + c = enc->tb_params.c; + e = enc->tb_params.ea; + } else { /* For Code Block mode */ + c = 1; + e = enc->cb_params.e; + } + + /* Update total_left */ + K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; + k_ = K - enc->n_filler; + in_length = (k_ - crc24_bits) >> 3; + out_length = (e + 7) >> 3; + + total_left = rte_pktmbuf_data_len(m_in) - in_offset; + + /* Update offsets */ + if (total_left != in_length) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included CBs sizes %d", + total_left); + } + + mbuf_append(m_out_head, m_out, out_length); + + /* Offset into the ring */ + ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); + /* Setup DMA Descriptor */ + desc = q->ring_addr + ring_offset; + + ret = fpga_dma_desc_te_fill(op, &desc->enc_req, m_in, m_out, + k_, e, in_offset, out_offset, ring_offset, c); + if (unlikely(ret < 0)) + return ret; + + /* Update lengths */ + total_left -= in_length; + op->ldpc_enc.output.length += out_length; + + if (total_left > 0) { + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", + total_left, in_length); + return -1; + } + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + print_dma_enc_desc_debug_info(desc); +#endif + return 1; +} + +static inline int +enqueue_ldpc_dec_one_op_cb(struct fpga_queue *q, struct rte_bbdev_dec_op *op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + int ret; + uint16_t ring_offset; + uint8_t c; + uint16_t e, in_length, out_length, k0, l, seg_total_left, sys_cols; + uint16_t K, parity_offset, harq_in_length = 0, harq_out_length = 0; + uint16_t crc24_overlap = 0; + struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; + struct rte_mbuf *m_in = dec->input.data; + struct rte_mbuf *m_out = dec->hard_output.data; + struct rte_mbuf *m_out_head = dec->hard_output.data; + uint16_t in_offset = dec->input.offset; + uint16_t out_offset = dec->hard_output.offset; + uint32_t harq_offset = 0; + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + /* Validate op structure */ + if (validate_dec_op(op) == -1) { + rte_bbdev_log(ERR, "LDPC decoder validation failed"); + return -EINVAL; + } +#endif + + /* Clear op status */ + op->status = 0; + + /* Setup DMA Descriptor */ + ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); + desc = q->ring_addr + ring_offset; + + if (check_bit(dec->op_flags, + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { + struct rte_mbuf *harq_in = dec->harq_combined_input.data; + struct rte_mbuf *harq_out = dec->harq_combined_output.data; + harq_in_length = dec->harq_combined_input.length; + uint32_t harq_in_offset = dec->harq_combined_input.offset; + uint32_t harq_out_offset = dec->harq_combined_output.offset; + + if (check_bit(dec->op_flags, + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_OUT_ENABLE + )) { + ret = fpga_harq_write_loopback(q->d, harq_in, + harq_in_length, harq_in_offset, + harq_out_offset); + } else if (check_bit(dec->op_flags, + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_IN_ENABLE + )) { + ret = fpga_harq_read_loopback(q->d, harq_out, + harq_in_length, harq_in_offset, + harq_out_offset); + dec->harq_combined_output.length = harq_in_length; + } else { + rte_bbdev_log(ERR, "OP flag Err!"); + ret = -1; + } + /* Set descriptor for dequeue */ + desc->dec_req.done = 1; + desc->dec_req.error = 0; + desc->dec_req.op_addr = op; + desc->dec_req.cbs_in_op = 1; + /* Mark this dummy descriptor to be dropped by HW */ + desc->dec_req.desc_idx = (ring_offset + 1) + & q->sw_ring_wrap_mask; + return ret; /* Error or number of CB */ + } + + if (m_in == NULL || m_out == NULL) { + rte_bbdev_log(ERR, "Invalid mbuf pointer"); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return -1; + } + + c = 1; + e = dec->cb_params.e; + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) + crc24_overlap = 24; + + sys_cols = (dec->basegraph == 1) ? 22 : 10; + K = sys_cols * dec->z_c; + parity_offset = K - 2 * dec->z_c; + + out_length = ((K - crc24_overlap - dec->n_filler) >> 3); + in_length = e; + seg_total_left = dec->input.length; + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { + harq_in_length = RTE_MIN(dec->harq_combined_input.length, + (uint32_t)dec->n_cb); + } + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { + k0 = get_k0(dec->n_cb, dec->z_c, + dec->basegraph, dec->rv_index); + if (k0 > parity_offset) + l = k0 + e; + else + l = k0 + e + dec->n_filler; + harq_out_length = RTE_MIN(RTE_MAX(harq_in_length, l), + dec->n_cb - dec->n_filler); + dec->harq_combined_output.length = harq_out_length; + } + + mbuf_append(m_out_head, m_out, out_length); + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) + harq_offset = dec->harq_combined_input.offset; + else if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) + harq_offset = dec->harq_combined_output.offset; + + if ((harq_offset & 0x3FF) > 0) { + rte_bbdev_log(ERR, "Invalid HARQ offset %d", harq_offset); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return -1; + } + + ret = fpga_dma_desc_ld_fill(op, &desc->dec_req, m_in, m_out, + harq_in_length, in_offset, out_offset, harq_offset, + ring_offset, c); + if (unlikely(ret < 0)) + return ret; + /* Update lengths */ + seg_total_left -= in_length; + op->ldpc_dec.hard_output.length += out_length; + if (seg_total_left > 0) { + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", + seg_total_left, in_length); + return -1; + } + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + print_dma_dec_desc_debug_info(desc); +#endif + + return 1; +} + +static uint16_t +fpga_enqueue_ldpc_enc(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t num) +{ + uint16_t i, total_enqueued_cbs = 0; + int32_t avail; + int enqueued_cbs; + struct fpga_queue *q = q_data->queue_private; + union fpga_dma_desc *desc; + + /* Check if queue is not full */ + if (unlikely(((q->tail + 1) & q->sw_ring_wrap_mask) == + q->head_free_desc)) + return 0; + + /* Calculates available space */ + avail = (q->head_free_desc > q->tail) ? + q->head_free_desc - q->tail - 1 : + q->ring_ctrl_reg.ring_size + q->head_free_desc - q->tail - 1; + + for (i = 0; i < num; ++i) { + + /* Check if there is available space for further + * processing + */ + if (unlikely(avail - 1 < 0)) + break; + avail -= 1; + enqueued_cbs = enqueue_ldpc_enc_one_op_cb(q, ops[i], + total_enqueued_cbs); + + if (enqueued_cbs < 0) + break; + + total_enqueued_cbs += enqueued_cbs; + + rte_bbdev_log_debug("enqueuing enc ops [%d/%d] | head %d | tail %d", + total_enqueued_cbs, num, + q->head_free_desc, q->tail); + } + + /* Set interrupt bit for last CB in enqueued ops. FPGA issues interrupt + * only when all previous CBs were already processed. + */ + desc = q->ring_addr + ((q->tail + total_enqueued_cbs - 1) + & q->sw_ring_wrap_mask); + desc->enc_req.irq_en = q->irq_enable; + + fpga_dma_enqueue(q, total_enqueued_cbs, &q_data->queue_stats); + + /* Update stats */ + q_data->queue_stats.enqueued_count += i; + q_data->queue_stats.enqueue_err_count += num - i; + + return i; +} + +static uint16_t +fpga_enqueue_ldpc_dec(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t num) +{ + uint16_t i, total_enqueued_cbs = 0; + int32_t avail; + int enqueued_cbs; + struct fpga_queue *q = q_data->queue_private; + union fpga_dma_desc *desc; + + /* Check if queue is not full */ + if (unlikely(((q->tail + 1) & q->sw_ring_wrap_mask) == + q->head_free_desc)) + return 0; + + /* Calculates available space */ + avail = (q->head_free_desc > q->tail) ? + q->head_free_desc - q->tail - 1 : + q->ring_ctrl_reg.ring_size + q->head_free_desc - q->tail - 1; + + for (i = 0; i < num; ++i) { + + /* Check if there is available space for further + * processing + */ + if (unlikely(avail - 1 < 0)) + break; + avail -= 1; + enqueued_cbs = enqueue_ldpc_dec_one_op_cb(q, ops[i], + total_enqueued_cbs); + + if (enqueued_cbs < 0) + break; + + total_enqueued_cbs += enqueued_cbs; + + rte_bbdev_log_debug("enqueuing dec ops [%d/%d] | head %d | tail %d", + total_enqueued_cbs, num, + q->head_free_desc, q->tail); + } + + /* Update stats */ + q_data->queue_stats.enqueued_count += i; + q_data->queue_stats.enqueue_err_count += num - i; + + /* Set interrupt bit for last CB in enqueued ops. FPGA issues interrupt + * only when all previous CBs were already processed. + */ + desc = q->ring_addr + ((q->tail + total_enqueued_cbs - 1) + & q->sw_ring_wrap_mask); + desc->enc_req.irq_en = q->irq_enable; + fpga_dma_enqueue(q, total_enqueued_cbs, &q_data->queue_stats); + return i; +} + + +static inline int +dequeue_ldpc_enc_one_op_cb(struct fpga_queue *q, + struct rte_bbdev_enc_op **op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + int desc_error; + /* Set current desc */ + desc = q->ring_addr + ((q->head_free_desc + desc_offset) + & q->sw_ring_wrap_mask); + + /*check if done */ + if (desc->enc_req.done == 0) + return -1; + + /* make sure the response is read atomically */ + rte_smp_rmb(); + + rte_bbdev_log_debug("DMA response desc %p", desc); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + print_dma_enc_desc_debug_info(desc); +#endif + + *op = desc->enc_req.op_addr; + /* Check the descriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + (*op)->status = desc_error << RTE_BBDEV_DATA_ERROR; + + return 1; +} + + +static inline int +dequeue_ldpc_dec_one_op_cb(struct fpga_queue *q, struct rte_bbdev_dec_op **op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + int desc_error; + /* Set descriptor */ + desc = q->ring_addr + ((q->head_free_desc + desc_offset) + & q->sw_ring_wrap_mask); + + /* Verify done bit is set */ + if (desc->dec_req.done == 0) + return -1; + + /* make sure the response is read atomically */ + rte_smp_rmb(); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + print_dma_dec_desc_debug_info(desc); +#endif + + *op = desc->dec_req.op_addr; + + if (check_bit((*op)->ldpc_dec.op_flags, + RTE_BBDEV_LDPC_INTERNAL_HARQ_MEMORY_LOOPBACK)) { + (*op)->status = 0; + return 1; + } + + /* FPGA reports iterations based on round-up minus 1 */ + (*op)->ldpc_dec.iter_count = desc->dec_req.iter + 1; + /* CRC Check criteria */ + if (desc->dec_req.crc24b_ind && !(desc->dec_req.crcb_pass)) + (*op)->status = 1 << RTE_BBDEV_CRC_ERROR; + /* et_pass = 0 when decoder fails */ + (*op)->status |= !(desc->dec_req.et_pass) << RTE_BBDEV_SYNDROME_ERROR; + /* Check the descriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->dec_req.error); + (*op)->status |= desc_error << RTE_BBDEV_DATA_ERROR; + return 1; +} + +static uint16_t +fpga_dequeue_ldpc_enc(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t num) +{ + struct fpga_queue *q = q_data->queue_private; + uint32_t avail = (q->tail - q->head_free_desc) & q->sw_ring_wrap_mask; + uint16_t i; + uint16_t dequeued_cbs = 0; + int ret; + + for (i = 0; (i < num) && (dequeued_cbs < avail); ++i) { + ret = dequeue_ldpc_enc_one_op_cb(q, &ops[i], dequeued_cbs); + + if (ret < 0) + break; + + dequeued_cbs += ret; + + rte_bbdev_log_debug("dequeuing enc ops [%d/%d] | head %d | tail %d", + dequeued_cbs, num, q->head_free_desc, q->tail); + } + + /* Update head */ + q->head_free_desc = (q->head_free_desc + dequeued_cbs) & + q->sw_ring_wrap_mask; + + /* Update stats */ + q_data->queue_stats.dequeued_count += i; + + return i; +} + +static uint16_t +fpga_dequeue_ldpc_dec(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t num) +{ + struct fpga_queue *q = q_data->queue_private; + uint32_t avail = (q->tail - q->head_free_desc) & q->sw_ring_wrap_mask; + uint16_t i; + uint16_t dequeued_cbs = 0; + int ret; + + for (i = 0; (i < num) && (dequeued_cbs < avail); ++i) { + ret = dequeue_ldpc_dec_one_op_cb(q, &ops[i], dequeued_cbs); + + if (ret < 0) + break; + + dequeued_cbs += ret; + + rte_bbdev_log_debug("dequeuing dec ops [%d/%d] | head %d | tail %d", + dequeued_cbs, num, q->head_free_desc, q->tail); + } + + /* Update head */ + q->head_free_desc = (q->head_free_desc + dequeued_cbs) & + q->sw_ring_wrap_mask; + + /* Update stats */ + q_data->queue_stats.dequeued_count += i; + + return i; +} + + +/* Initialization Function */ +static void +fpga_5gnr_fec_init(struct rte_bbdev *dev, struct rte_pci_driver *drv) +{ + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + + dev->dev_ops = &fpga_ops; + dev->enqueue_ldpc_enc_ops = fpga_enqueue_ldpc_enc; + dev->enqueue_ldpc_dec_ops = fpga_enqueue_ldpc_dec; + dev->dequeue_ldpc_enc_ops = fpga_dequeue_ldpc_enc; + dev->dequeue_ldpc_dec_ops = fpga_dequeue_ldpc_dec; + + ((struct fpga_5gnr_fec_device *) dev->data->dev_private)->pf_device = + !strcmp(drv->driver.name, + RTE_STR(FPGA_5GNR_FEC_PF_DRIVER_NAME)); + ((struct fpga_5gnr_fec_device *) dev->data->dev_private)->mmio_base = + pci_dev->mem_resource[0].addr; + + rte_bbdev_log_debug( + "Init device %s [%s] @ virtaddr %p phyaddr %#"PRIx64, + dev->device->driver->name, dev->data->name, + (void *)pci_dev->mem_resource[0].addr, + pci_dev->mem_resource[0].phys_addr); +} + +static int +fpga_5gnr_fec_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) +{ + struct rte_bbdev *bbdev = NULL; + char dev_name[RTE_BBDEV_NAME_MAX_LEN]; + + if (pci_dev == NULL) { + rte_bbdev_log(ERR, "NULL PCI device"); + return -EINVAL; + } + + rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); + + /* Allocate memory to be used privately by drivers */ + bbdev = rte_bbdev_allocate(pci_dev->device.name); + if (bbdev == NULL) + return -ENODEV; + + /* allocate device private memory */ + bbdev->data->dev_private = rte_zmalloc_socket(dev_name, + sizeof(struct fpga_5gnr_fec_device), + RTE_CACHE_LINE_SIZE, + pci_dev->device.numa_node); + + if (bbdev->data->dev_private == NULL) { + rte_bbdev_log(CRIT, + "Allocate of %zu bytes for device \"%s\" failed", + sizeof(struct fpga_5gnr_fec_device), dev_name); + rte_bbdev_release(bbdev); + return -ENOMEM; + } + + /* Fill HW specific part of device structure */ + bbdev->device = &pci_dev->device; + bbdev->intr_handle = &pci_dev->intr_handle; + bbdev->data->socket_id = pci_dev->device.numa_node; + + /* Invoke FEC FPGA device initialization function */ + fpga_5gnr_fec_init(bbdev, pci_drv); + + rte_bbdev_log_debug("bbdev id = %u [%s]", + bbdev->data->dev_id, dev_name); + + struct fpga_5gnr_fec_device *d = bbdev->data->dev_private; + uint32_t version_id = fpga_reg_read_32(d->mmio_base, + FPGA_5GNR_FEC_VERSION_ID); + rte_bbdev_log(INFO, "FEC FPGA RTL v%u.%u", + ((uint16_t)(version_id >> 16)), ((uint16_t)version_id)); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + if (!strcmp(bbdev->device->driver->name, + RTE_STR(FPGA_5GNR_FEC_PF_DRIVER_NAME))) + print_static_reg_debug_info(d->mmio_base); +#endif + return 0; +} + +static int +fpga_5gnr_fec_remove(struct rte_pci_device *pci_dev) +{ + struct rte_bbdev *bbdev; + int ret; + uint8_t dev_id; + + if (pci_dev == NULL) + return -EINVAL; + + /* Find device */ + bbdev = rte_bbdev_get_named_dev(pci_dev->device.name); + if (bbdev == NULL) { + rte_bbdev_log(CRIT, + "Couldn't find HW dev \"%s\" to uninitialise it", + pci_dev->device.name); + return -ENODEV; + } + dev_id = bbdev->data->dev_id; + + /* free device private memory before close */ + rte_free(bbdev->data->dev_private); + + /* Close device */ + ret = rte_bbdev_close(dev_id); + if (ret < 0) + rte_bbdev_log(ERR, + "Device %i failed to close during uninit: %i", + dev_id, ret); + + /* release bbdev from library */ + ret = rte_bbdev_release(bbdev); + if (ret) + rte_bbdev_log(ERR, "Device %i failed to uninit: %i", dev_id, + ret); + + rte_bbdev_log_debug("Destroyed bbdev = %u", dev_id); + + return 0; +} + +static inline void +set_default_fpga_conf(struct fpga_5gnr_fec_conf *def_conf) +{ + /* clear default configuration before initialization */ + memset(def_conf, 0, sizeof(struct fpga_5gnr_fec_conf)); + /* Set pf mode to true */ + def_conf->pf_mode_en = true; + + /* Set ratio between UL and DL to 1:1 (unit of weight is 3 CBs) */ + def_conf->ul_bandwidth = 3; + def_conf->dl_bandwidth = 3; + + /* Set Load Balance Factor to 64 */ + def_conf->dl_load_balance = 64; + def_conf->ul_load_balance = 64; +} + +/* Initial configuration of FPGA 5GNR FEC device */ +int +fpga_5gnr_fec_configure(const char *dev_name, + const struct fpga_5gnr_fec_conf *conf) +{ + uint32_t payload_32, address; + uint16_t payload_16; + uint8_t payload_8; + uint16_t q_id, vf_id, total_q_id, total_ul_q_id, total_dl_q_id; + struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); + struct fpga_5gnr_fec_conf def_conf; + + if (bbdev == NULL) { + rte_bbdev_log(ERR, + "Invalid dev_name (%s), or device is not yet initialised", + dev_name); + return -ENODEV; + } + + struct fpga_5gnr_fec_device *d = bbdev->data->dev_private; + + if (conf == NULL) { + rte_bbdev_log(ERR, + "FPGA Configuration was not provided. Default configuration will be loaded."); + set_default_fpga_conf(&def_conf); + conf = &def_conf; + } + + /* + * Configure UL:DL ratio. + * [7:0]: UL weight + * [15:8]: DL weight + */ + payload_16 = (conf->dl_bandwidth << 8) | conf->ul_bandwidth; + address = FPGA_5GNR_FEC_CONFIGURATION; + fpga_reg_write_16(d->mmio_base, address, payload_16); + + /* Clear all queues registers */ + payload_32 = FPGA_INVALID_HW_QUEUE_ID; + for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { + address = (q_id << 2) + FPGA_5GNR_FEC_QUEUE_MAP; + fpga_reg_write_32(d->mmio_base, address, payload_32); + } + + /* + * If PF mode is enabled allocate all queues for PF only. + * + * For VF mode each VF can have different number of UL and DL queues. + * Total number of queues to configure cannot exceed FPGA + * capabilities - 64 queues - 32 queues for UL and 32 queues for DL. + * Queues mapping is done according to configuration: + * + * UL queues: + * | Q_ID | VF_ID | + * | 0 | 0 | + * | ... | 0 | + * | conf->vf_dl_queues_number[0] - 1 | 0 | + * | conf->vf_dl_queues_number[0] | 1 | + * | ... | 1 | + * | conf->vf_dl_queues_number[1] - 1 | 1 | + * | ... | ... | + * | conf->vf_dl_queues_number[7] - 1 | 7 | + * + * DL queues: + * | Q_ID | VF_ID | + * | 32 | 0 | + * | ... | 0 | + * | conf->vf_ul_queues_number[0] - 1 | 0 | + * | conf->vf_ul_queues_number[0] | 1 | + * | ... | 1 | + * | conf->vf_ul_queues_number[1] - 1 | 1 | + * | ... | ... | + * | conf->vf_ul_queues_number[7] - 1 | 7 | + * + * Example of configuration: + * conf->vf_ul_queues_number[0] = 4; -> 4 UL queues for VF0 + * conf->vf_dl_queues_number[0] = 4; -> 4 DL queues for VF0 + * conf->vf_ul_queues_number[1] = 2; -> 2 UL queues for VF1 + * conf->vf_dl_queues_number[1] = 2; -> 2 DL queues for VF1 + * + * UL: + * | Q_ID | VF_ID | + * | 0 | 0 | + * | 1 | 0 | + * | 2 | 0 | + * | 3 | 0 | + * | 4 | 1 | + * | 5 | 1 | + * + * DL: + * | Q_ID | VF_ID | + * | 32 | 0 | + * | 33 | 0 | + * | 34 | 0 | + * | 35 | 0 | + * | 36 | 1 | + * | 37 | 1 | + */ + if (conf->pf_mode_en) { + payload_32 = 0x1; + for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { + address = (q_id << 2) + FPGA_5GNR_FEC_QUEUE_MAP; + fpga_reg_write_32(d->mmio_base, address, payload_32); + } + } else { + /* Calculate total number of UL and DL queues to configure */ + total_ul_q_id = total_dl_q_id = 0; + for (vf_id = 0; vf_id < FPGA_5GNR_FEC_NUM_VFS; ++vf_id) { + total_ul_q_id += conf->vf_ul_queues_number[vf_id]; + total_dl_q_id += conf->vf_dl_queues_number[vf_id]; + } + total_q_id = total_dl_q_id + total_ul_q_id; + /* + * Check if total number of queues to configure does not exceed + * FPGA capabilities (64 queues - 32 UL and 32 DL queues) + */ + if ((total_ul_q_id > FPGA_NUM_UL_QUEUES) || + (total_dl_q_id > FPGA_NUM_DL_QUEUES) || + (total_q_id > FPGA_TOTAL_NUM_QUEUES)) { + rte_bbdev_log(ERR, + "FPGA Configuration failed. Too many queues to configure: UL_Q %u, DL_Q %u, FPGA_Q %u", + total_ul_q_id, total_dl_q_id, + FPGA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + total_ul_q_id = 0; + for (vf_id = 0; vf_id < FPGA_5GNR_FEC_NUM_VFS; ++vf_id) { + for (q_id = 0; q_id < conf->vf_ul_queues_number[vf_id]; + ++q_id, ++total_ul_q_id) { + address = (total_ul_q_id << 2) + + FPGA_5GNR_FEC_QUEUE_MAP; + payload_32 = ((0x80 + vf_id) << 16) | 0x1; + fpga_reg_write_32(d->mmio_base, address, + payload_32); + } + } + total_dl_q_id = 0; + for (vf_id = 0; vf_id < FPGA_5GNR_FEC_NUM_VFS; ++vf_id) { + for (q_id = 0; q_id < conf->vf_dl_queues_number[vf_id]; + ++q_id, ++total_dl_q_id) { + address = ((total_dl_q_id + FPGA_NUM_UL_QUEUES) + << 2) + FPGA_5GNR_FEC_QUEUE_MAP; + payload_32 = ((0x80 + vf_id) << 16) | 0x1; + fpga_reg_write_32(d->mmio_base, address, + payload_32); + } + } + } + + /* Setting Load Balance Factor */ + payload_16 = (conf->dl_load_balance << 8) | (conf->ul_load_balance); + address = FPGA_5GNR_FEC_LOAD_BALANCE_FACTOR; + fpga_reg_write_16(d->mmio_base, address, payload_16); + + /* Setting length of ring descriptor entry */ + payload_16 = FPGA_RING_DESC_ENTRY_LENGTH; + address = FPGA_5GNR_FEC_RING_DESC_LEN; + fpga_reg_write_16(d->mmio_base, address, payload_16); + + /* Setting FLR timeout value */ + payload_16 = conf->flr_time_out; + address = FPGA_5GNR_FEC_FLR_TIME_OUT; + fpga_reg_write_16(d->mmio_base, address, payload_16); + + /* Queue PF/VF mapping table is ready */ + payload_8 = 0x1; + address = FPGA_5GNR_FEC_QUEUE_PF_VF_MAP_DONE; + fpga_reg_write_8(d->mmio_base, address, payload_8); + + rte_bbdev_log_debug("PF FPGA 5GNR FEC configuration complete for %s", + dev_name); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + print_static_reg_debug_info(d->mmio_base); +#endif + return 0; +} + +/* FPGA 5GNR FEC PCI PF address map */ +static struct rte_pci_id pci_id_fpga_5gnr_fec_pf_map[] = { + { + RTE_PCI_DEVICE(FPGA_5GNR_FEC_VENDOR_ID, + FPGA_5GNR_FEC_PF_DEVICE_ID) + }, + {.device_id = 0}, +}; + +static struct rte_pci_driver fpga_5gnr_fec_pci_pf_driver = { + .probe = fpga_5gnr_fec_probe, + .remove = fpga_5gnr_fec_remove, + .id_table = pci_id_fpga_5gnr_fec_pf_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING +}; + +/* FPGA 5GNR FEC PCI VF address map */ +static struct rte_pci_id pci_id_fpga_5gnr_fec_vf_map[] = { + { + RTE_PCI_DEVICE(FPGA_5GNR_FEC_VENDOR_ID, + FPGA_5GNR_FEC_VF_DEVICE_ID) + }, + {.device_id = 0}, +}; + +static struct rte_pci_driver fpga_5gnr_fec_pci_vf_driver = { + .probe = fpga_5gnr_fec_probe, + .remove = fpga_5gnr_fec_remove, + .id_table = pci_id_fpga_5gnr_fec_vf_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING +}; + + +RTE_PMD_REGISTER_PCI(FPGA_5GNR_FEC_PF_DRIVER_NAME, fpga_5gnr_fec_pci_pf_driver); +RTE_PMD_REGISTER_PCI_TABLE(FPGA_5GNR_FEC_PF_DRIVER_NAME, + pci_id_fpga_5gnr_fec_pf_map); +RTE_PMD_REGISTER_PCI(FPGA_5GNR_FEC_VF_DRIVER_NAME, fpga_5gnr_fec_pci_vf_driver); +RTE_PMD_REGISTER_PCI_TABLE(FPGA_5GNR_FEC_VF_DRIVER_NAME, + pci_id_fpga_5gnr_fec_vf_map); + +RTE_INIT(fpga_5gnr_fec_init_log) +{ + fpga_5gnr_fec_logtype = rte_log_register("pmd.bb.fpga_5gnr_fec"); + if (fpga_5gnr_fec_logtype >= 0) +#ifdef RTE_LIBRTE_BBDEV_DEBUG + rte_log_set_level(fpga_5gnr_fec_logtype, RTE_LOG_DEBUG); +#else + rte_log_set_level(fpga_5gnr_fec_logtype, RTE_LOG_NOTICE); +#endif +} diff --git a/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_pmd_bbdev_fpga_5gnr_fec_version.map b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_pmd_bbdev_fpga_5gnr_fec_version.map new file mode 100644 index 000000000..b0fb9717f --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_pmd_bbdev_fpga_5gnr_fec_version.map @@ -0,0 +1,10 @@ +DPDK_20.0 { + local: *; +}; + +EXPERIMENTAL { + global: + + fpga_5gnr_fec_configure; + +}; diff --git a/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_pmd_fpga_5gnr_fec.h b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_pmd_fpga_5gnr_fec.h new file mode 100644 index 000000000..70a4acf0b --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_5gnr_fec/rte_pmd_fpga_5gnr_fec.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2020 Intel Corporation + */ + +#ifndef _RTE_PMD_FPGA_5GNR_FEC_H_ +#define _RTE_PMD_FPGA_5GNR_FEC_H_ + +#include <stdint.h> +#include <stdbool.h> + +/** + * @file rte_pmd_fpga_5gnr_fec.h + * + * Interface for Intel(R) FGPA 5GNR FEC device configuration at the host level, + * directly accessible by the application. + * Configuration related to 5GNR functionality is done through + * librte_bbdev library. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Number of Virtual Functions FGPA 4G FEC supports */ +#define FPGA_5GNR_FEC_NUM_VFS 8 + +/** + * Structure to pass FPGA 4G FEC configuration. + */ +struct fpga_5gnr_fec_conf { + /** 1 if PF is used for dataplane, 0 for VFs */ + bool pf_mode_en; + /** Number of UL queues per VF */ + uint8_t vf_ul_queues_number[FPGA_5GNR_FEC_NUM_VFS]; + /** Number of DL queues per VF */ + uint8_t vf_dl_queues_number[FPGA_5GNR_FEC_NUM_VFS]; + /** UL bandwidth. Needed for schedule algorithm */ + uint8_t ul_bandwidth; + /** DL bandwidth. Needed for schedule algorithm */ + uint8_t dl_bandwidth; + /** UL Load Balance */ + uint8_t ul_load_balance; + /** DL Load Balance */ + uint8_t dl_load_balance; + /** FLR timeout value */ + uint16_t flr_time_out; +}; + +/** + * Configure Intel(R) FPGA 5GNR FEC device + * + * @param dev_name + * The name of the device. This is the short form of PCI BDF, e.g. 00:01.0. + * It can also be retrieved for a bbdev device from the dev_name field in the + * rte_bbdev_info structure returned by rte_bbdev_info_get(). + * @param conf + * Configuration to apply to FPGA 4G FEC. + * + * @return + * Zero on success, negative value on failure. + */ +__rte_experimental +int +fpga_5gnr_fec_configure(const char *dev_name, + const struct fpga_5gnr_fec_conf *conf); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PMD_FPGA_5GNR_FEC_H_ */ diff --git a/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/Makefile b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/Makefile new file mode 100644 index 000000000..30caafe3d --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2019 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_pmd_bbdev_fpga_lte_fec.a + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_bbdev +LDLIBS += -lrte_pci -lrte_bus_pci + +# versioning export map +EXPORT_MAP := rte_pmd_bbdev_fpga_lte_fec_version.map + +# library source files +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC) += fpga_lte_fec.c + +# export include files +SYMLINK-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_FPGA_LTE_FEC)-include += fpga_lte_fec.h + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.c b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.c new file mode 100644 index 000000000..abc5a1bf6 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.c @@ -0,0 +1,2675 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#include <unistd.h> + +#include <rte_common.h> +#include <rte_log.h> +#include <rte_dev.h> +#include <rte_malloc.h> +#include <rte_mempool.h> +#include <rte_errno.h> +#include <rte_pci.h> +#include <rte_bus_pci.h> +#include <rte_byteorder.h> +#ifdef RTE_BBDEV_OFFLOAD_COST +#include <rte_cycles.h> +#endif + +#include <rte_bbdev.h> +#include <rte_bbdev_pmd.h> + +#include "fpga_lte_fec.h" + +/* Turbo SW PMD logging ID */ +static int fpga_lte_fec_logtype; + +/* Helper macro for logging */ +#define rte_bbdev_log(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, fpga_lte_fec_logtype, fmt "\n", \ + ##__VA_ARGS__) + +#ifdef RTE_LIBRTE_BBDEV_DEBUG +#define rte_bbdev_log_debug(fmt, ...) \ + rte_bbdev_log(DEBUG, "fpga_lte_fec: " fmt, \ + ##__VA_ARGS__) +#else +#define rte_bbdev_log_debug(fmt, ...) +#endif + +/* FPGA LTE FEC driver names */ +#define FPGA_LTE_FEC_PF_DRIVER_NAME intel_fpga_lte_fec_pf +#define FPGA_LTE_FEC_VF_DRIVER_NAME intel_fpga_lte_fec_vf + +/* FPGA LTE FEC PCI vendor & device IDs */ +#define FPGA_LTE_FEC_VENDOR_ID (0x1172) +#define FPGA_LTE_FEC_PF_DEVICE_ID (0x5052) +#define FPGA_LTE_FEC_VF_DEVICE_ID (0x5050) + +/* Align DMA descriptors to 256 bytes - cache-aligned */ +#define FPGA_RING_DESC_ENTRY_LENGTH (8) +/* Ring size is in 256 bits (32 bytes) units */ +#define FPGA_RING_DESC_LEN_UNIT_BYTES (32) +/* Maximum size of queue */ +#define FPGA_RING_MAX_SIZE (1024) +#define FPGA_FLR_TIMEOUT_UNIT (16.384) + +#define FPGA_NUM_UL_QUEUES (32) +#define FPGA_NUM_DL_QUEUES (32) +#define FPGA_TOTAL_NUM_QUEUES (FPGA_NUM_UL_QUEUES + FPGA_NUM_DL_QUEUES) +#define FPGA_NUM_INTR_VEC (FPGA_TOTAL_NUM_QUEUES - RTE_INTR_VEC_RXTX_OFFSET) + +#define FPGA_INVALID_HW_QUEUE_ID (0xFFFFFFFF) + +#define FPGA_QUEUE_FLUSH_TIMEOUT_US (1000) +#define FPGA_TIMEOUT_CHECK_INTERVAL (5) + +/* FPGA LTE FEC Register mapping on BAR0 */ +enum { + FPGA_LTE_FEC_VERSION_ID = 0x00000000, /* len: 4B */ + FPGA_LTE_FEC_CONFIGURATION = 0x00000004, /* len: 2B */ + FPGA_LTE_FEC_QUEUE_PF_VF_MAP_DONE = 0x00000008, /* len: 1B */ + FPGA_LTE_FEC_LOAD_BALANCE_FACTOR = 0x0000000a, /* len: 2B */ + FPGA_LTE_FEC_RING_DESC_LEN = 0x0000000c, /* len: 2B */ + FPGA_LTE_FEC_FLR_TIME_OUT = 0x0000000e, /* len: 2B */ + FPGA_LTE_FEC_VFQ_FLUSH_STATUS_LW = 0x00000018, /* len: 4B */ + FPGA_LTE_FEC_VFQ_FLUSH_STATUS_HI = 0x0000001c, /* len: 4B */ + FPGA_LTE_FEC_VF0_DEBUG = 0x00000020, /* len: 4B */ + FPGA_LTE_FEC_VF1_DEBUG = 0x00000024, /* len: 4B */ + FPGA_LTE_FEC_VF2_DEBUG = 0x00000028, /* len: 4B */ + FPGA_LTE_FEC_VF3_DEBUG = 0x0000002c, /* len: 4B */ + FPGA_LTE_FEC_VF4_DEBUG = 0x00000030, /* len: 4B */ + FPGA_LTE_FEC_VF5_DEBUG = 0x00000034, /* len: 4B */ + FPGA_LTE_FEC_VF6_DEBUG = 0x00000038, /* len: 4B */ + FPGA_LTE_FEC_VF7_DEBUG = 0x0000003c, /* len: 4B */ + FPGA_LTE_FEC_QUEUE_MAP = 0x00000040, /* len: 256B */ + FPGA_LTE_FEC_RING_CTRL_REGS = 0x00000200 /* len: 2048B */ +}; + +/* FPGA LTE FEC Ring Control Registers */ +enum { + FPGA_LTE_FEC_RING_HEAD_ADDR = 0x00000008, + FPGA_LTE_FEC_RING_SIZE = 0x00000010, + FPGA_LTE_FEC_RING_MISC = 0x00000014, + FPGA_LTE_FEC_RING_ENABLE = 0x00000015, + FPGA_LTE_FEC_RING_FLUSH_QUEUE_EN = 0x00000016, + FPGA_LTE_FEC_RING_SHADOW_TAIL = 0x00000018, + FPGA_LTE_FEC_RING_HEAD_POINT = 0x0000001C +}; + +/* FPGA LTE FEC DESCRIPTOR ERROR */ +enum { + DESC_ERR_NO_ERR = 0x0, + DESC_ERR_K_OUT_OF_RANGE = 0x1, + DESC_ERR_K_NOT_NORMAL = 0x2, + DESC_ERR_KPAI_NOT_NORMAL = 0x3, + DESC_ERR_DESC_OFFSET_ERR = 0x4, + DESC_ERR_DESC_READ_FAIL = 0x8, + DESC_ERR_DESC_READ_TIMEOUT = 0x9, + DESC_ERR_DESC_READ_TLP_POISONED = 0xA, + DESC_ERR_CB_READ_FAIL = 0xC, + DESC_ERR_CB_READ_TIMEOUT = 0xD, + DESC_ERR_CB_READ_TLP_POISONED = 0xE +}; + +/* FPGA LTE FEC DMA Encoding Request Descriptor */ +struct __rte_packed fpga_dma_enc_desc { + uint32_t done:1, + rsrvd0:11, + error:4, + rsrvd1:16; + uint32_t ncb:16, + rsrvd2:14, + rv:2; + uint32_t bypass_rm:1, + irq_en:1, + crc_en:1, + rsrvd3:13, + offset:10, + rsrvd4:6; + uint16_t e; + uint16_t k; + uint32_t out_addr_lw; + uint32_t out_addr_hi; + uint32_t in_addr_lw; + uint32_t in_addr_hi; + + union { + struct { + /* Virtual addresses used to retrieve SW context info */ + void *op_addr; + /* Stores information about total number of Code Blocks + * in currently processed Transport Block + */ + uint64_t cbs_in_op; + }; + + uint8_t sw_ctxt[FPGA_RING_DESC_LEN_UNIT_BYTES * + (FPGA_RING_DESC_ENTRY_LENGTH - 1)]; + }; +}; + +/* FPGA LTE FEC DMA Decoding Request Descriptor */ +struct __rte_packed fpga_dma_dec_desc { + uint32_t done:1, + iter:5, + rsrvd0:2, + crc_pass:1, + rsrvd1:3, + error:4, + crc_type:1, + rsrvd2:7, + max_iter:5, + rsrvd3:3; + uint32_t rsrvd4; + uint32_t bypass_rm:1, + irq_en:1, + drop_crc:1, + rsrvd5:13, + offset:10, + rsrvd6:6; + uint16_t k; + uint16_t in_len; + uint32_t out_addr_lw; + uint32_t out_addr_hi; + uint32_t in_addr_lw; + uint32_t in_addr_hi; + + union { + struct { + /* Virtual addresses used to retrieve SW context info */ + void *op_addr; + /* Stores information about total number of Code Blocks + * in currently processed Transport Block + */ + uint8_t cbs_in_op; + }; + + uint32_t sw_ctxt[8 * (FPGA_RING_DESC_ENTRY_LENGTH - 1)]; + }; +}; + +/* FPGA LTE DMA Descriptor */ +union fpga_dma_desc { + struct fpga_dma_enc_desc enc_req; + struct fpga_dma_dec_desc dec_req; +}; + +/* FPGA LTE FEC Ring Control Register */ +struct __rte_packed fpga_ring_ctrl_reg { + uint64_t ring_base_addr; + uint64_t ring_head_addr; + uint16_t ring_size:11; + uint16_t rsrvd0; + union { /* Miscellaneous register */ + uint8_t misc; + uint8_t max_ul_dec:5, + max_ul_dec_en:1, + rsrvd1:2; + }; + uint8_t enable; + uint8_t flush_queue_en; + uint8_t rsrvd2; + uint16_t shadow_tail; + uint16_t rsrvd3; + uint16_t head_point; + uint16_t rsrvd4; + +}; + +/* Private data structure for each FPGA FEC device */ +struct fpga_lte_fec_device { + /** Base address of MMIO registers (BAR0) */ + void *mmio_base; + /** Base address of memory for sw rings */ + void *sw_rings; + /** Physical address of sw_rings */ + rte_iova_t sw_rings_phys; + /** Number of bytes available for each queue in device. */ + uint32_t sw_ring_size; + /** Max number of entries available for each queue in device */ + uint32_t sw_ring_max_depth; + /** Base address of response tail pointer buffer */ + uint32_t *tail_ptrs; + /** Physical address of tail pointers */ + rte_iova_t tail_ptr_phys; + /** Queues flush completion flag */ + uint64_t *flush_queue_status; + /* Bitmap capturing which Queues are bound to the PF/VF */ + uint64_t q_bound_bit_map; + /* Bitmap capturing which Queues have already been assigned */ + uint64_t q_assigned_bit_map; + /** True if this is a PF FPGA FEC device */ + bool pf_device; +}; + +/* Structure associated with each queue. */ +struct __rte_cache_aligned fpga_queue { + struct fpga_ring_ctrl_reg ring_ctrl_reg; /* Ring Control Register */ + union fpga_dma_desc *ring_addr; /* Virtual address of software ring */ + uint64_t *ring_head_addr; /* Virtual address of completion_head */ + uint64_t shadow_completion_head; /* Shadow completion head value */ + uint16_t head_free_desc; /* Ring head */ + uint16_t tail; /* Ring tail */ + /* Mask used to wrap enqueued descriptors on the sw ring */ + uint32_t sw_ring_wrap_mask; + uint32_t irq_enable; /* Enable ops dequeue interrupts if set to 1 */ + uint8_t q_idx; /* Queue index */ + struct fpga_lte_fec_device *d; + /* MMIO register of shadow_tail used to enqueue descriptors */ + void *shadow_tail_addr; +}; + +/* Write to 16 bit MMIO register address */ +static inline void +mmio_write_16(void *addr, uint16_t value) +{ + *((volatile uint16_t *)(addr)) = rte_cpu_to_le_16(value); +} + +/* Write to 32 bit MMIO register address */ +static inline void +mmio_write_32(void *addr, uint32_t value) +{ + *((volatile uint32_t *)(addr)) = rte_cpu_to_le_32(value); +} + +/* Write to 64 bit MMIO register address */ +static inline void +mmio_write_64(void *addr, uint64_t value) +{ + *((volatile uint64_t *)(addr)) = rte_cpu_to_le_64(value); +} + +/* Write a 8 bit register of a FPGA LTE FEC device */ +static inline void +fpga_reg_write_8(void *mmio_base, uint32_t offset, uint8_t payload) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + *((volatile uint8_t *)(reg_addr)) = payload; +} + +/* Write a 16 bit register of a FPGA LTE FEC device */ +static inline void +fpga_reg_write_16(void *mmio_base, uint32_t offset, uint16_t payload) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + mmio_write_16(reg_addr, payload); +} + +/* Write a 32 bit register of a FPGA LTE FEC device */ +static inline void +fpga_reg_write_32(void *mmio_base, uint32_t offset, uint32_t payload) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + mmio_write_32(reg_addr, payload); +} + +/* Write a 64 bit register of a FPGA LTE FEC device */ +static inline void +fpga_reg_write_64(void *mmio_base, uint32_t offset, uint64_t payload) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + mmio_write_64(reg_addr, payload); +} + +/* Write a ring control register of a FPGA LTE FEC device */ +static inline void +fpga_ring_reg_write(void *mmio_base, uint32_t offset, + struct fpga_ring_ctrl_reg payload) +{ + fpga_reg_write_64(mmio_base, offset, payload.ring_base_addr); + fpga_reg_write_64(mmio_base, offset + FPGA_LTE_FEC_RING_HEAD_ADDR, + payload.ring_head_addr); + fpga_reg_write_16(mmio_base, offset + FPGA_LTE_FEC_RING_SIZE, + payload.ring_size); + fpga_reg_write_16(mmio_base, offset + FPGA_LTE_FEC_RING_HEAD_POINT, + payload.head_point); + fpga_reg_write_8(mmio_base, offset + FPGA_LTE_FEC_RING_FLUSH_QUEUE_EN, + payload.flush_queue_en); + fpga_reg_write_16(mmio_base, offset + FPGA_LTE_FEC_RING_SHADOW_TAIL, + payload.shadow_tail); + fpga_reg_write_8(mmio_base, offset + FPGA_LTE_FEC_RING_MISC, + payload.misc); + fpga_reg_write_8(mmio_base, offset + FPGA_LTE_FEC_RING_ENABLE, + payload.enable); +} + +/* Read a register of FPGA LTE FEC device */ +static uint32_t +fpga_reg_read_32(void *mmio_base, uint32_t offset) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + uint32_t ret = *((volatile uint32_t *)(reg_addr)); + return rte_le_to_cpu_32(ret); +} + +#ifdef RTE_LIBRTE_BBDEV_DEBUG +/* Read a register of FPGA LTE FEC device */ +static uint8_t +fpga_reg_read_8(void *mmio_base, uint32_t offset) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + return *((volatile uint8_t *)(reg_addr)); +} + +/* Read a register of FPGA LTE FEC device */ +static uint16_t +fpga_reg_read_16(void *mmio_base, uint32_t offset) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + uint16_t ret = *((volatile uint16_t *)(reg_addr)); + return rte_le_to_cpu_16(ret); +} + +/* Read a register of FPGA LTE FEC device */ +static uint64_t +fpga_reg_read_64(void *mmio_base, uint32_t offset) +{ + void *reg_addr = RTE_PTR_ADD(mmio_base, offset); + uint64_t ret = *((volatile uint64_t *)(reg_addr)); + return rte_le_to_cpu_64(ret); +} + +/* Read Ring Control Register of FPGA LTE FEC device */ +static inline void +print_ring_reg_debug_info(void *mmio_base, uint32_t offset) +{ + rte_bbdev_log_debug( + "FPGA MMIO base address @ %p | Ring Control Register @ offset = 0x%08" + PRIx32, mmio_base, offset); + rte_bbdev_log_debug( + "RING_BASE_ADDR = 0x%016"PRIx64, + fpga_reg_read_64(mmio_base, offset)); + rte_bbdev_log_debug( + "RING_HEAD_ADDR = 0x%016"PRIx64, + fpga_reg_read_64(mmio_base, offset + + FPGA_LTE_FEC_RING_HEAD_ADDR)); + rte_bbdev_log_debug( + "RING_SIZE = 0x%04"PRIx16, + fpga_reg_read_16(mmio_base, offset + + FPGA_LTE_FEC_RING_SIZE)); + rte_bbdev_log_debug( + "RING_MISC = 0x%02"PRIx8, + fpga_reg_read_8(mmio_base, offset + + FPGA_LTE_FEC_RING_MISC)); + rte_bbdev_log_debug( + "RING_ENABLE = 0x%02"PRIx8, + fpga_reg_read_8(mmio_base, offset + + FPGA_LTE_FEC_RING_ENABLE)); + rte_bbdev_log_debug( + "RING_FLUSH_QUEUE_EN = 0x%02"PRIx8, + fpga_reg_read_8(mmio_base, offset + + FPGA_LTE_FEC_RING_FLUSH_QUEUE_EN)); + rte_bbdev_log_debug( + "RING_SHADOW_TAIL = 0x%04"PRIx16, + fpga_reg_read_16(mmio_base, offset + + FPGA_LTE_FEC_RING_SHADOW_TAIL)); + rte_bbdev_log_debug( + "RING_HEAD_POINT = 0x%04"PRIx16, + fpga_reg_read_16(mmio_base, offset + + FPGA_LTE_FEC_RING_HEAD_POINT)); +} + +/* Read Static Register of FPGA LTE FEC device */ +static inline void +print_static_reg_debug_info(void *mmio_base) +{ + uint16_t config = fpga_reg_read_16(mmio_base, + FPGA_LTE_FEC_CONFIGURATION); + uint8_t qmap_done = fpga_reg_read_8(mmio_base, + FPGA_LTE_FEC_QUEUE_PF_VF_MAP_DONE); + uint16_t lb_factor = fpga_reg_read_16(mmio_base, + FPGA_LTE_FEC_LOAD_BALANCE_FACTOR); + uint16_t ring_desc_len = fpga_reg_read_16(mmio_base, + FPGA_LTE_FEC_RING_DESC_LEN); + uint16_t flr_time_out = fpga_reg_read_16(mmio_base, + FPGA_LTE_FEC_FLR_TIME_OUT); + + rte_bbdev_log_debug("UL.DL Weights = %u.%u", + ((uint8_t)config), ((uint8_t)(config >> 8))); + rte_bbdev_log_debug("UL.DL Load Balance = %u.%u", + ((uint8_t)lb_factor), ((uint8_t)(lb_factor >> 8))); + rte_bbdev_log_debug("Queue-PF/VF Mapping Table = %s", + (qmap_done > 0) ? "READY" : "NOT-READY"); + rte_bbdev_log_debug("Ring Descriptor Size = %u bytes", + ring_desc_len*FPGA_RING_DESC_LEN_UNIT_BYTES); + rte_bbdev_log_debug("FLR Timeout = %f usec", + (float)flr_time_out*FPGA_FLR_TIMEOUT_UNIT); +} + +/* Print decode DMA Descriptor of FPGA LTE FEC device */ +static void +print_dma_dec_desc_debug_info(union fpga_dma_desc *desc) +{ + rte_bbdev_log_debug("DMA response desc %p\n" + "\t-- done(%"PRIu32") | iter(%"PRIu32") | crc_pass(%"PRIu32")" + " | error (%"PRIu32") | crc_type(%"PRIu32")\n" + "\t-- max_iter(%"PRIu32") | bypass_rm(%"PRIu32") | " + "irq_en (%"PRIu32") | drop_crc(%"PRIu32") | offset(%"PRIu32")\n" + "\t-- k(%"PRIu32") | in_len (%"PRIu16") | op_add(%p)\n" + "\t-- cbs_in_op(%"PRIu32") | in_add (0x%08"PRIx32"%08"PRIx32") | " + "out_add (0x%08"PRIx32"%08"PRIx32")", + desc, + (uint32_t)desc->dec_req.done, + (uint32_t)desc->dec_req.iter, + (uint32_t)desc->dec_req.crc_pass, + (uint32_t)desc->dec_req.error, + (uint32_t)desc->dec_req.crc_type, + (uint32_t)desc->dec_req.max_iter, + (uint32_t)desc->dec_req.bypass_rm, + (uint32_t)desc->dec_req.irq_en, + (uint32_t)desc->dec_req.drop_crc, + (uint32_t)desc->dec_req.offset, + (uint32_t)desc->dec_req.k, + (uint16_t)desc->dec_req.in_len, + desc->dec_req.op_addr, + (uint32_t)desc->dec_req.cbs_in_op, + (uint32_t)desc->dec_req.in_addr_hi, + (uint32_t)desc->dec_req.in_addr_lw, + (uint32_t)desc->dec_req.out_addr_hi, + (uint32_t)desc->dec_req.out_addr_lw); +} +#endif + +static int +fpga_setup_queues(struct rte_bbdev *dev, uint16_t num_queues, int socket_id) +{ + /* Number of queues bound to a PF/VF */ + uint32_t hw_q_num = 0; + uint32_t ring_size, payload, address, q_id, offset; + rte_iova_t phys_addr; + struct fpga_ring_ctrl_reg ring_reg; + struct fpga_lte_fec_device *fpga_dev = dev->data->dev_private; + + address = FPGA_LTE_FEC_QUEUE_PF_VF_MAP_DONE; + if (!(fpga_reg_read_32(fpga_dev->mmio_base, address) & 0x1)) { + rte_bbdev_log(ERR, + "Queue-PF/VF mapping is not set! Was PF configured for device (%s) ?", + dev->data->name); + return -EPERM; + } + + /* Clear queue registers structure */ + memset(&ring_reg, 0, sizeof(struct fpga_ring_ctrl_reg)); + + /* Scan queue map. + * If a queue is valid and mapped to a calling PF/VF the read value is + * replaced with a queue ID and if it's not then + * FPGA_INVALID_HW_QUEUE_ID is returned. + */ + for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { + uint32_t hw_q_id = fpga_reg_read_32(fpga_dev->mmio_base, + FPGA_LTE_FEC_QUEUE_MAP + (q_id << 2)); + + rte_bbdev_log_debug("%s: queue ID: %u, registry queue ID: %u", + dev->device->name, q_id, hw_q_id); + + if (hw_q_id != FPGA_INVALID_HW_QUEUE_ID) { + fpga_dev->q_bound_bit_map |= (1ULL << q_id); + /* Clear queue register of found queue */ + offset = FPGA_LTE_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q_id); + fpga_ring_reg_write(fpga_dev->mmio_base, + offset, ring_reg); + ++hw_q_num; + } + } + if (hw_q_num == 0) { + rte_bbdev_log(ERR, + "No HW queues assigned to this device. Probably this is a VF configured for PF mode. Check device configuration!"); + return -ENODEV; + } + + if (num_queues > hw_q_num) { + rte_bbdev_log(ERR, + "Not enough queues for device %s! Requested: %u, available: %u", + dev->device->name, num_queues, hw_q_num); + return -EINVAL; + } + + ring_size = FPGA_RING_MAX_SIZE * sizeof(struct fpga_dma_dec_desc); + + /* Enforce 32 byte alignment */ + RTE_BUILD_BUG_ON((RTE_CACHE_LINE_SIZE % 32) != 0); + + /* Allocate memory for SW descriptor rings */ + fpga_dev->sw_rings = rte_zmalloc_socket(dev->device->driver->name, + num_queues * ring_size, RTE_CACHE_LINE_SIZE, + socket_id); + if (fpga_dev->sw_rings == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate memory for %s:%u sw_rings", + dev->device->driver->name, dev->data->dev_id); + return -ENOMEM; + } + + fpga_dev->sw_rings_phys = rte_malloc_virt2iova(fpga_dev->sw_rings); + fpga_dev->sw_ring_size = ring_size; + fpga_dev->sw_ring_max_depth = FPGA_RING_MAX_SIZE; + + /* Allocate memory for ring flush status */ + fpga_dev->flush_queue_status = rte_zmalloc_socket(NULL, + sizeof(uint64_t), RTE_CACHE_LINE_SIZE, socket_id); + if (fpga_dev->flush_queue_status == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate memory for %s:%u flush_queue_status", + dev->device->driver->name, dev->data->dev_id); + return -ENOMEM; + } + + /* Set the flush status address registers */ + phys_addr = rte_malloc_virt2iova(fpga_dev->flush_queue_status); + + address = FPGA_LTE_FEC_VFQ_FLUSH_STATUS_LW; + payload = (uint32_t)(phys_addr); + fpga_reg_write_32(fpga_dev->mmio_base, address, payload); + + address = FPGA_LTE_FEC_VFQ_FLUSH_STATUS_HI; + payload = (uint32_t)(phys_addr >> 32); + fpga_reg_write_32(fpga_dev->mmio_base, address, payload); + + return 0; +} + +static int +fpga_dev_close(struct rte_bbdev *dev) +{ + struct fpga_lte_fec_device *fpga_dev = dev->data->dev_private; + + rte_free(fpga_dev->sw_rings); + rte_free(fpga_dev->flush_queue_status); + + return 0; +} + +static void +fpga_dev_info_get(struct rte_bbdev *dev, + struct rte_bbdev_driver_info *dev_info) +{ + struct fpga_lte_fec_device *d = dev->data->dev_private; + uint32_t q_id = 0; + + /* TODO RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN and numbers of buffers are set + * to temporary values as they are required by test application while + * validation phase. + */ + static const struct rte_bbdev_op_cap bbdev_capabilities[] = { + { + .type = RTE_BBDEV_OP_TURBO_DEC, + .cap.turbo_dec = { + .capability_flags = + RTE_BBDEV_TURBO_CRC_TYPE_24B | + RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | + RTE_BBDEV_TURBO_DEC_INTERRUPTS | + RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | + RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP, + .max_llr_modulus = INT8_MAX, + .num_buffers_src = + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, + .num_buffers_hard_out = + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, + .num_buffers_soft_out = 0 + } + }, + { + .type = RTE_BBDEV_OP_TURBO_ENC, + .cap.turbo_enc = { + .capability_flags = + RTE_BBDEV_TURBO_CRC_24B_ATTACH | + RTE_BBDEV_TURBO_RATE_MATCH | + RTE_BBDEV_TURBO_ENC_INTERRUPTS, + .num_buffers_src = + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, + .num_buffers_dst = + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS + } + }, + RTE_BBDEV_END_OF_CAPABILITIES_LIST() + }; + + static struct rte_bbdev_queue_conf default_queue_conf; + default_queue_conf.socket = dev->data->socket_id; + default_queue_conf.queue_size = FPGA_RING_MAX_SIZE; + + + dev_info->driver_name = dev->device->driver->name; + dev_info->queue_size_lim = FPGA_RING_MAX_SIZE; + dev_info->hardware_accelerated = true; + dev_info->min_alignment = 64; + dev_info->default_queue_conf = default_queue_conf; + dev_info->capabilities = bbdev_capabilities; + dev_info->cpu_flag_reqs = NULL; + + /* Calculates number of queues assigned to device */ + dev_info->max_num_queues = 0; + for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { + uint32_t hw_q_id = fpga_reg_read_32(d->mmio_base, + FPGA_LTE_FEC_QUEUE_MAP + (q_id << 2)); + if (hw_q_id != FPGA_INVALID_HW_QUEUE_ID) + dev_info->max_num_queues++; + } +} + +/** + * Find index of queue bound to current PF/VF which is unassigned. Return -1 + * when there is no available queue + */ +static int +fpga_find_free_queue_idx(struct rte_bbdev *dev, + const struct rte_bbdev_queue_conf *conf) +{ + struct fpga_lte_fec_device *d = dev->data->dev_private; + uint64_t q_idx; + uint8_t i = 0; + uint8_t range = FPGA_TOTAL_NUM_QUEUES >> 1; + + if (conf->op_type == RTE_BBDEV_OP_TURBO_ENC) { + i = FPGA_NUM_DL_QUEUES; + range = FPGA_TOTAL_NUM_QUEUES; + } + + for (; i < range; ++i) { + q_idx = 1ULL << i; + /* Check if index of queue is bound to current PF/VF */ + if (d->q_bound_bit_map & q_idx) + /* Check if found queue was not already assigned */ + if (!(d->q_assigned_bit_map & q_idx)) { + d->q_assigned_bit_map |= q_idx; + return i; + } + } + + rte_bbdev_log(INFO, "Failed to find free queue on %s", dev->data->name); + + return -1; +} + +static int +fpga_queue_setup(struct rte_bbdev *dev, uint16_t queue_id, + const struct rte_bbdev_queue_conf *conf) +{ + uint32_t address, ring_offset; + struct fpga_lte_fec_device *d = dev->data->dev_private; + struct fpga_queue *q; + int8_t q_idx; + + /* Check if there is a free queue to assign */ + q_idx = fpga_find_free_queue_idx(dev, conf); + if (q_idx == -1) + return -1; + + /* Allocate the queue data structure. */ + q = rte_zmalloc_socket(dev->device->driver->name, sizeof(*q), + RTE_CACHE_LINE_SIZE, conf->socket); + if (q == NULL) { + /* Mark queue as un-assigned */ + d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); + rte_bbdev_log(ERR, "Failed to allocate queue memory"); + return -ENOMEM; + } + + q->d = d; + q->q_idx = q_idx; + + /* Set ring_base_addr */ + q->ring_addr = RTE_PTR_ADD(d->sw_rings, (d->sw_ring_size * queue_id)); + q->ring_ctrl_reg.ring_base_addr = d->sw_rings_phys + + (d->sw_ring_size * queue_id); + + /* Allocate memory for Completion Head variable*/ + q->ring_head_addr = rte_zmalloc_socket(dev->device->driver->name, + sizeof(uint64_t), RTE_CACHE_LINE_SIZE, conf->socket); + if (q->ring_head_addr == NULL) { + /* Mark queue as un-assigned */ + d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); + rte_free(q); + rte_bbdev_log(ERR, + "Failed to allocate memory for %s:%u completion_head", + dev->device->driver->name, dev->data->dev_id); + return -ENOMEM; + } + /* Set ring_head_addr */ + q->ring_ctrl_reg.ring_head_addr = + rte_malloc_virt2iova(q->ring_head_addr); + + /* Clear shadow_completion_head */ + q->shadow_completion_head = 0; + + /* Set ring_size */ + if (conf->queue_size > FPGA_RING_MAX_SIZE) { + /* Mark queue as un-assigned */ + d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q_idx)); + rte_free(q->ring_head_addr); + rte_free(q); + rte_bbdev_log(ERR, + "Size of queue is too big %d (MAX: %d ) for %s:%u", + conf->queue_size, FPGA_RING_MAX_SIZE, + dev->device->driver->name, dev->data->dev_id); + return -EINVAL; + } + q->ring_ctrl_reg.ring_size = conf->queue_size; + + /* Set Miscellaneous FPGA register*/ + /* Max iteration number for TTI mitigation - todo */ + q->ring_ctrl_reg.max_ul_dec = 0; + /* Enable max iteration number for TTI - todo */ + q->ring_ctrl_reg.max_ul_dec_en = 0; + + /* Enable the ring */ + q->ring_ctrl_reg.enable = 1; + + /* Set FPGA head_point and tail registers */ + q->ring_ctrl_reg.head_point = q->tail = 0; + + /* Set FPGA shadow_tail register */ + q->ring_ctrl_reg.shadow_tail = q->tail; + + /* Calculates the ring offset for found queue */ + ring_offset = FPGA_LTE_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q_idx); + + /* Set FPGA Ring Control Registers */ + fpga_ring_reg_write(d->mmio_base, ring_offset, q->ring_ctrl_reg); + + /* Store MMIO register of shadow_tail */ + address = ring_offset + FPGA_LTE_FEC_RING_SHADOW_TAIL; + q->shadow_tail_addr = RTE_PTR_ADD(d->mmio_base, address); + + q->head_free_desc = q->tail; + + /* Set wrap mask */ + q->sw_ring_wrap_mask = conf->queue_size - 1; + + rte_bbdev_log_debug("Setup dev%u q%u: queue_idx=%u", + dev->data->dev_id, queue_id, q->q_idx); + + dev->data->queues[queue_id].queue_private = q; + + rte_bbdev_log_debug("BBDEV queue[%d] set up for FPGA queue[%d]", + queue_id, q_idx); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + /* Read FPGA Ring Control Registers after configuration*/ + print_ring_reg_debug_info(d->mmio_base, ring_offset); +#endif + return 0; +} + +static int +fpga_queue_release(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_lte_fec_device *d = dev->data->dev_private; + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + struct fpga_ring_ctrl_reg ring_reg; + uint32_t offset; + + rte_bbdev_log_debug("FPGA Queue[%d] released", queue_id); + + if (q != NULL) { + memset(&ring_reg, 0, sizeof(struct fpga_ring_ctrl_reg)); + offset = FPGA_LTE_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); + /* Disable queue */ + fpga_reg_write_8(d->mmio_base, + offset + FPGA_LTE_FEC_RING_ENABLE, 0x00); + /* Clear queue registers */ + fpga_ring_reg_write(d->mmio_base, offset, ring_reg); + + /* Mark the Queue as un-assigned */ + d->q_assigned_bit_map &= (0xFFFFFFFF - (1ULL << q->q_idx)); + rte_free(q->ring_head_addr); + rte_free(q); + dev->data->queues[queue_id].queue_private = NULL; + } + + return 0; +} + +/* Function starts a device queue. */ +static int +fpga_queue_start(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_lte_fec_device *d = dev->data->dev_private; +#ifdef RTE_LIBRTE_BBDEV_DEBUG + if (d == NULL) { + rte_bbdev_log(ERR, "Invalid device pointer"); + return -1; + } +#endif + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + uint32_t offset = FPGA_LTE_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); + uint8_t enable = 0x01; + uint16_t zero = 0x0000; + + /* Clear queue head and tail variables */ + q->tail = q->head_free_desc = 0; + + /* Clear FPGA head_point and tail registers */ + fpga_reg_write_16(d->mmio_base, offset + FPGA_LTE_FEC_RING_HEAD_POINT, + zero); + fpga_reg_write_16(d->mmio_base, offset + FPGA_LTE_FEC_RING_SHADOW_TAIL, + zero); + + /* Enable queue */ + fpga_reg_write_8(d->mmio_base, offset + FPGA_LTE_FEC_RING_ENABLE, + enable); + + rte_bbdev_log_debug("FPGA Queue[%d] started", queue_id); + return 0; +} + +/* Function stops a device queue. */ +static int +fpga_queue_stop(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_lte_fec_device *d = dev->data->dev_private; +#ifdef RTE_LIBRTE_BBDEV_DEBUG + if (d == NULL) { + rte_bbdev_log(ERR, "Invalid device pointer"); + return -1; + } +#endif + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + uint32_t offset = FPGA_LTE_FEC_RING_CTRL_REGS + + (sizeof(struct fpga_ring_ctrl_reg) * q->q_idx); + uint8_t payload = 0x01; + uint8_t counter = 0; + uint8_t timeout = FPGA_QUEUE_FLUSH_TIMEOUT_US / + FPGA_TIMEOUT_CHECK_INTERVAL; + + /* Set flush_queue_en bit to trigger queue flushing */ + fpga_reg_write_8(d->mmio_base, + offset + FPGA_LTE_FEC_RING_FLUSH_QUEUE_EN, payload); + + /** Check if queue flush is completed. + * FPGA will update the completion flag after queue flushing is + * completed. If completion flag is not updated within 1ms it is + * considered as a failure. + */ + while (!(*((volatile uint8_t *)d->flush_queue_status + q->q_idx) & payload)) { + if (counter > timeout) { + rte_bbdev_log(ERR, "FPGA Queue Flush failed for queue %d", + queue_id); + return -1; + } + usleep(FPGA_TIMEOUT_CHECK_INTERVAL); + counter++; + } + + /* Disable queue */ + payload = 0x00; + fpga_reg_write_8(d->mmio_base, offset + FPGA_LTE_FEC_RING_ENABLE, + payload); + + rte_bbdev_log_debug("FPGA Queue[%d] stopped", queue_id); + return 0; +} + +static inline uint16_t +get_queue_id(struct rte_bbdev_data *data, uint8_t q_idx) +{ + uint16_t queue_id; + + for (queue_id = 0; queue_id < data->num_queues; ++queue_id) { + struct fpga_queue *q = data->queues[queue_id].queue_private; + if (q != NULL && q->q_idx == q_idx) + return queue_id; + } + + return -1; +} + +/* Interrupt handler triggered by FPGA dev for handling specific interrupt */ +static void +fpga_dev_interrupt_handler(void *cb_arg) +{ + struct rte_bbdev *dev = cb_arg; + struct fpga_lte_fec_device *fpga_dev = dev->data->dev_private; + struct fpga_queue *q; + uint64_t ring_head; + uint64_t q_idx; + uint16_t queue_id; + uint8_t i; + + /* Scan queue assigned to this device */ + for (i = 0; i < FPGA_TOTAL_NUM_QUEUES; ++i) { + q_idx = 1ULL << i; + if (fpga_dev->q_bound_bit_map & q_idx) { + queue_id = get_queue_id(dev->data, i); + if (queue_id == (uint16_t) -1) + continue; + + /* Check if completion head was changed */ + q = dev->data->queues[queue_id].queue_private; + ring_head = *q->ring_head_addr; + if (q->shadow_completion_head != ring_head && + q->irq_enable == 1) { + q->shadow_completion_head = ring_head; + rte_bbdev_pmd_callback_process( + dev, + RTE_BBDEV_EVENT_DEQUEUE, + &queue_id); + } + } + } +} + +static int +fpga_queue_intr_enable(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + + if (!rte_intr_cap_multiple(dev->intr_handle)) + return -ENOTSUP; + + q->irq_enable = 1; + + return 0; +} + +static int +fpga_queue_intr_disable(struct rte_bbdev *dev, uint16_t queue_id) +{ + struct fpga_queue *q = dev->data->queues[queue_id].queue_private; + q->irq_enable = 0; + + return 0; +} + +static int +fpga_intr_enable(struct rte_bbdev *dev) +{ + int ret; + uint8_t i; + + if (!rte_intr_cap_multiple(dev->intr_handle)) { + rte_bbdev_log(ERR, "Multiple intr vector is not supported by FPGA (%s)", + dev->data->name); + return -ENOTSUP; + } + + /* Create event file descriptors for each of 64 queue. Event fds will be + * mapped to FPGA IRQs in rte_intr_enable(). This is a 1:1 mapping where + * the IRQ number is a direct translation to the queue number. + * + * 63 (FPGA_NUM_INTR_VEC) event fds are created as rte_intr_enable() + * mapped the first IRQ to already created interrupt event file + * descriptor (intr_handle->fd). + */ + if (rte_intr_efd_enable(dev->intr_handle, FPGA_NUM_INTR_VEC)) { + rte_bbdev_log(ERR, "Failed to create fds for %u queues", + dev->data->num_queues); + return -1; + } + + /* TODO Each event file descriptor is overwritten by interrupt event + * file descriptor. That descriptor is added to epoll observed list. + * It ensures that callback function assigned to that descriptor will + * invoked when any FPGA queue issues interrupt. + */ + for (i = 0; i < FPGA_NUM_INTR_VEC; ++i) + dev->intr_handle->efds[i] = dev->intr_handle->fd; + + if (!dev->intr_handle->intr_vec) { + dev->intr_handle->intr_vec = rte_zmalloc("intr_vec", + dev->data->num_queues * sizeof(int), 0); + if (!dev->intr_handle->intr_vec) { + rte_bbdev_log(ERR, "Failed to allocate %u vectors", + dev->data->num_queues); + return -ENOMEM; + } + } + + ret = rte_intr_enable(dev->intr_handle); + if (ret < 0) { + rte_bbdev_log(ERR, + "Couldn't enable interrupts for device: %s", + dev->data->name); + return ret; + } + + ret = rte_intr_callback_register(dev->intr_handle, + fpga_dev_interrupt_handler, dev); + if (ret < 0) { + rte_bbdev_log(ERR, + "Couldn't register interrupt callback for device: %s", + dev->data->name); + return ret; + } + + return 0; +} + +static const struct rte_bbdev_ops fpga_ops = { + .setup_queues = fpga_setup_queues, + .intr_enable = fpga_intr_enable, + .close = fpga_dev_close, + .info_get = fpga_dev_info_get, + .queue_setup = fpga_queue_setup, + .queue_stop = fpga_queue_stop, + .queue_start = fpga_queue_start, + .queue_release = fpga_queue_release, + .queue_intr_enable = fpga_queue_intr_enable, + .queue_intr_disable = fpga_queue_intr_disable +}; + +static inline void +fpga_dma_enqueue(struct fpga_queue *q, uint16_t num_desc, + struct rte_bbdev_stats *queue_stats) +{ +#ifdef RTE_BBDEV_OFFLOAD_COST + uint64_t start_time = 0; + queue_stats->acc_offload_cycles = 0; +#else + RTE_SET_USED(queue_stats); +#endif + + /* Update tail and shadow_tail register */ + q->tail = (q->tail + num_desc) & q->sw_ring_wrap_mask; + + rte_wmb(); + +#ifdef RTE_BBDEV_OFFLOAD_COST + /* Start time measurement for enqueue function offload. */ + start_time = rte_rdtsc_precise(); +#endif + mmio_write_16(q->shadow_tail_addr, q->tail); + +#ifdef RTE_BBDEV_OFFLOAD_COST + rte_wmb(); + queue_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif +} + +/* Calculates number of CBs in processed encoder TB based on 'r' and input + * length. + */ +static inline uint8_t +get_num_cbs_in_op_enc(struct rte_bbdev_op_turbo_enc *turbo_enc) +{ + uint8_t c, c_neg, r, crc24_bits = 0; + uint16_t k, k_neg, k_pos; + uint8_t cbs_in_op = 0; + int32_t length; + + length = turbo_enc->input.length; + r = turbo_enc->tb_params.r; + c = turbo_enc->tb_params.c; + c_neg = turbo_enc->tb_params.c_neg; + k_neg = turbo_enc->tb_params.k_neg; + k_pos = turbo_enc->tb_params.k_pos; + crc24_bits = 24; + while (length > 0 && r < c) { + k = (r < c_neg) ? k_neg : k_pos; + length -= (k - crc24_bits) >> 3; + r++; + cbs_in_op++; + } + + return cbs_in_op; +} + +/* Calculates number of CBs in processed decoder TB based on 'r' and input + * length. + */ +static inline uint16_t +get_num_cbs_in_op_dec(struct rte_bbdev_op_turbo_dec *turbo_dec) +{ + uint8_t c, c_neg, r = 0; + uint16_t kw, k, k_neg, k_pos, cbs_in_op = 0; + int32_t length; + + length = turbo_dec->input.length; + r = turbo_dec->tb_params.r; + c = turbo_dec->tb_params.c; + c_neg = turbo_dec->tb_params.c_neg; + k_neg = turbo_dec->tb_params.k_neg; + k_pos = turbo_dec->tb_params.k_pos; + while (length > 0 && r < c) { + k = (r < c_neg) ? k_neg : k_pos; + kw = RTE_ALIGN_CEIL(k + 4, 32) * 3; + length -= kw; + r++; + cbs_in_op++; + } + + return cbs_in_op; +} + +/* Read flag value 0/1/ from bitmap */ +static inline bool +check_bit(uint32_t bitmap, uint32_t bitmask) +{ + return bitmap & bitmask; +} + +/* Print an error if a descriptor error has occurred. + * Return 0 on success, 1 on failure + */ +static inline int +check_desc_error(uint32_t error_code) { + switch (error_code) { + case DESC_ERR_NO_ERR: + return 0; + case DESC_ERR_K_OUT_OF_RANGE: + rte_bbdev_log(ERR, "Block_size_k is out of range (k<40 or k>6144)"); + break; + case DESC_ERR_K_NOT_NORMAL: + rte_bbdev_log(ERR, "Block_size_k is not a normal value within normal range"); + break; + case DESC_ERR_KPAI_NOT_NORMAL: + rte_bbdev_log(ERR, "Three_kpai is not a normal value for UL only"); + break; + case DESC_ERR_DESC_OFFSET_ERR: + rte_bbdev_log(ERR, "Queue offset does not meet the expectation in the FPGA"); + break; + case (DESC_ERR_K_OUT_OF_RANGE | DESC_ERR_DESC_OFFSET_ERR): + rte_bbdev_log(ERR, "Block_size_k is out of range (k<40 or k>6144) and queue offset error"); + break; + case (DESC_ERR_K_NOT_NORMAL | DESC_ERR_DESC_OFFSET_ERR): + rte_bbdev_log(ERR, "Block_size_k is not a normal value within normal range and queue offset error"); + break; + case (DESC_ERR_KPAI_NOT_NORMAL | DESC_ERR_DESC_OFFSET_ERR): + rte_bbdev_log(ERR, "Three_kpai is not a normal value for UL only and queue offset error"); + break; + case DESC_ERR_DESC_READ_FAIL: + rte_bbdev_log(ERR, "Unsuccessful completion for descriptor read"); + break; + case DESC_ERR_DESC_READ_TIMEOUT: + rte_bbdev_log(ERR, "Descriptor read time-out"); + break; + case DESC_ERR_DESC_READ_TLP_POISONED: + rte_bbdev_log(ERR, "Descriptor read TLP poisoned"); + break; + case DESC_ERR_CB_READ_FAIL: + rte_bbdev_log(ERR, "Unsuccessful completion for code block"); + break; + case DESC_ERR_CB_READ_TIMEOUT: + rte_bbdev_log(ERR, "Code block read time-out"); + break; + case DESC_ERR_CB_READ_TLP_POISONED: + rte_bbdev_log(ERR, "Code block read TLP poisoned"); + break; + default: + rte_bbdev_log(ERR, "Descriptor error unknown error code %u", + error_code); + break; + } + return 1; +} + +/** + * Set DMA descriptor for encode operation (1 Code Block) + * + * @param op + * Pointer to a single encode operation. + * @param desc + * Pointer to DMA descriptor. + * @param input + * Pointer to pointer to input data which will be decoded. + * @param k + * K value (length of input in bits). + * @param e + * E value (length of output in bits). + * @param ncb + * Ncb value (size of the soft buffer). + * @param out_length + * Length of output buffer + * @param in_offset + * Input offset in rte_mbuf structure. It is used for calculating the point + * where data is starting. + * @param out_offset + * Output offset in rte_mbuf structure. It is used for calculating the point + * where hard output data will be stored. + * @param cbs_in_op + * Number of CBs contained in one operation. + */ +static inline int +fpga_dma_desc_te_fill(struct rte_bbdev_enc_op *op, + struct fpga_dma_enc_desc *desc, struct rte_mbuf *input, + struct rte_mbuf *output, uint16_t k, uint16_t e, uint16_t ncb, + uint32_t in_offset, uint32_t out_offset, uint16_t desc_offset, + uint8_t cbs_in_op) + +{ + /* reset */ + desc->done = 0; + desc->crc_en = check_bit(op->turbo_enc.op_flags, + RTE_BBDEV_TURBO_CRC_24B_ATTACH); + desc->bypass_rm = !check_bit(op->turbo_enc.op_flags, + RTE_BBDEV_TURBO_RATE_MATCH); + desc->k = k; + desc->e = e; + desc->ncb = ncb; + desc->rv = op->turbo_enc.rv_index; + desc->offset = desc_offset; + /* Set inbound data buffer address */ + desc->in_addr_hi = (uint32_t)( + rte_pktmbuf_mtophys_offset(input, in_offset) >> 32); + desc->in_addr_lw = (uint32_t)( + rte_pktmbuf_mtophys_offset(input, in_offset)); + + desc->out_addr_hi = (uint32_t)( + rte_pktmbuf_mtophys_offset(output, out_offset) >> 32); + desc->out_addr_lw = (uint32_t)( + rte_pktmbuf_mtophys_offset(output, out_offset)); + + /* Save software context needed for dequeue */ + desc->op_addr = op; + + /* Set total number of CBs in an op */ + desc->cbs_in_op = cbs_in_op; + + return 0; +} + +/** + * Set DMA descriptor for encode operation (1 Code Block) + * + * @param op + * Pointer to a single encode operation. + * @param desc + * Pointer to DMA descriptor. + * @param input + * Pointer to pointer to input data which will be decoded. + * @param in_length + * Length of an input. + * @param k + * K value (length of an output in bits). + * @param in_offset + * Input offset in rte_mbuf structure. It is used for calculating the point + * where data is starting. + * @param out_offset + * Output offset in rte_mbuf structure. It is used for calculating the point + * where hard output data will be stored. + * @param cbs_in_op + * Number of CBs contained in one operation. + */ +static inline int +fpga_dma_desc_td_fill(struct rte_bbdev_dec_op *op, + struct fpga_dma_dec_desc *desc, struct rte_mbuf *input, + struct rte_mbuf *output, uint16_t in_length, uint16_t k, + uint32_t in_offset, uint32_t out_offset, uint16_t desc_offset, + uint8_t cbs_in_op) +{ + /* reset */ + desc->done = 0; + /* Set inbound data buffer address */ + desc->in_addr_hi = (uint32_t)( + rte_pktmbuf_mtophys_offset(input, in_offset) >> 32); + desc->in_addr_lw = (uint32_t)( + rte_pktmbuf_mtophys_offset(input, in_offset)); + desc->in_len = in_length; + desc->k = k; + desc->crc_type = !check_bit(op->turbo_dec.op_flags, + RTE_BBDEV_TURBO_CRC_TYPE_24B); + if ((op->turbo_dec.code_block_mode == 0) + && !check_bit(op->turbo_dec.op_flags, + RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) + desc->drop_crc = 1; + desc->max_iter = op->turbo_dec.iter_max * 2; + desc->offset = desc_offset; + desc->out_addr_hi = (uint32_t)( + rte_pktmbuf_mtophys_offset(output, out_offset) >> 32); + desc->out_addr_lw = (uint32_t)( + rte_pktmbuf_mtophys_offset(output, out_offset)); + + /* Save software context needed for dequeue */ + desc->op_addr = op; + + /* Set total number of CBs in an op */ + desc->cbs_in_op = cbs_in_op; + + return 0; +} + +#ifdef RTE_LIBRTE_BBDEV_DEBUG +/* Validates turbo encoder parameters */ +static int +validate_enc_op(struct rte_bbdev_enc_op *op) +{ + struct rte_bbdev_op_turbo_enc *turbo_enc = &op->turbo_enc; + struct rte_bbdev_op_enc_turbo_cb_params *cb = NULL; + struct rte_bbdev_op_enc_turbo_tb_params *tb = NULL; + uint16_t kw, kw_neg, kw_pos; + + if (turbo_enc->input.length > + RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { + rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", + turbo_enc->input.length, + RTE_BBDEV_TURBO_MAX_TB_SIZE); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return -1; + } + + if (op->mempool == NULL) { + rte_bbdev_log(ERR, "Invalid mempool pointer"); + return -1; + } + if (turbo_enc->input.data == NULL) { + rte_bbdev_log(ERR, "Invalid input pointer"); + return -1; + } + if (turbo_enc->output.data == NULL) { + rte_bbdev_log(ERR, "Invalid output pointer"); + return -1; + } + if (turbo_enc->rv_index > 3) { + rte_bbdev_log(ERR, + "rv_index (%u) is out of range 0 <= value <= 3", + turbo_enc->rv_index); + return -1; + } + if (turbo_enc->code_block_mode != 0 && + turbo_enc->code_block_mode != 1) { + rte_bbdev_log(ERR, + "code_block_mode (%u) is out of range 0 <= value <= 1", + turbo_enc->code_block_mode); + return -1; + } + + if (turbo_enc->code_block_mode == 0) { + tb = &turbo_enc->tb_params; + if ((tb->k_neg < RTE_BBDEV_TURBO_MIN_CB_SIZE + || tb->k_neg > RTE_BBDEV_TURBO_MAX_CB_SIZE) + && tb->c_neg > 0) { + rte_bbdev_log(ERR, + "k_neg (%u) is out of range %u <= value <= %u", + tb->k_neg, RTE_BBDEV_TURBO_MIN_CB_SIZE, + RTE_BBDEV_TURBO_MAX_CB_SIZE); + return -1; + } + if (tb->k_pos < RTE_BBDEV_TURBO_MIN_CB_SIZE + || tb->k_pos > RTE_BBDEV_TURBO_MAX_CB_SIZE) { + rte_bbdev_log(ERR, + "k_pos (%u) is out of range %u <= value <= %u", + tb->k_pos, RTE_BBDEV_TURBO_MIN_CB_SIZE, + RTE_BBDEV_TURBO_MAX_CB_SIZE); + return -1; + } + if (tb->c_neg > (RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1)) + rte_bbdev_log(ERR, + "c_neg (%u) is out of range 0 <= value <= %u", + tb->c_neg, + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1); + if (tb->c < 1 || tb->c > RTE_BBDEV_TURBO_MAX_CODE_BLOCKS) { + rte_bbdev_log(ERR, + "c (%u) is out of range 1 <= value <= %u", + tb->c, RTE_BBDEV_TURBO_MAX_CODE_BLOCKS); + return -1; + } + if (tb->cab > tb->c) { + rte_bbdev_log(ERR, + "cab (%u) is greater than c (%u)", + tb->cab, tb->c); + return -1; + } + if ((tb->ea < RTE_BBDEV_TURBO_MIN_CB_SIZE || (tb->ea % 2)) + && tb->r < tb->cab) { + rte_bbdev_log(ERR, + "ea (%u) is less than %u or it is not even", + tb->ea, RTE_BBDEV_TURBO_MIN_CB_SIZE); + return -1; + } + if ((tb->eb < RTE_BBDEV_TURBO_MIN_CB_SIZE || (tb->eb % 2)) + && tb->c > tb->cab) { + rte_bbdev_log(ERR, + "eb (%u) is less than %u or it is not even", + tb->eb, RTE_BBDEV_TURBO_MIN_CB_SIZE); + return -1; + } + + kw_neg = 3 * RTE_ALIGN_CEIL(tb->k_neg + 4, + RTE_BBDEV_TURBO_C_SUBBLOCK); + if (tb->ncb_neg < tb->k_neg || tb->ncb_neg > kw_neg) { + rte_bbdev_log(ERR, + "ncb_neg (%u) is out of range (%u) k_neg <= value <= (%u) kw_neg", + tb->ncb_neg, tb->k_neg, kw_neg); + return -1; + } + + kw_pos = 3 * RTE_ALIGN_CEIL(tb->k_pos + 4, + RTE_BBDEV_TURBO_C_SUBBLOCK); + if (tb->ncb_pos < tb->k_pos || tb->ncb_pos > kw_pos) { + rte_bbdev_log(ERR, + "ncb_pos (%u) is out of range (%u) k_pos <= value <= (%u) kw_pos", + tb->ncb_pos, tb->k_pos, kw_pos); + return -1; + } + if (tb->r > (tb->c - 1)) { + rte_bbdev_log(ERR, + "r (%u) is greater than c - 1 (%u)", + tb->r, tb->c - 1); + return -1; + } + } else { + cb = &turbo_enc->cb_params; + if (cb->k < RTE_BBDEV_TURBO_MIN_CB_SIZE + || cb->k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { + rte_bbdev_log(ERR, + "k (%u) is out of range %u <= value <= %u", + cb->k, RTE_BBDEV_TURBO_MIN_CB_SIZE, + RTE_BBDEV_TURBO_MAX_CB_SIZE); + return -1; + } + + if (cb->e < RTE_BBDEV_TURBO_MIN_CB_SIZE || (cb->e % 2)) { + rte_bbdev_log(ERR, + "e (%u) is less than %u or it is not even", + cb->e, RTE_BBDEV_TURBO_MIN_CB_SIZE); + return -1; + } + + kw = RTE_ALIGN_CEIL(cb->k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; + if (cb->ncb < cb->k || cb->ncb > kw) { + rte_bbdev_log(ERR, + "ncb (%u) is out of range (%u) k <= value <= (%u) kw", + cb->ncb, cb->k, kw); + return -1; + } + } + + return 0; +} +#endif + +static inline char * +mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) +{ + if (unlikely(len > rte_pktmbuf_tailroom(m))) + return NULL; + + char *tail = (char *)m->buf_addr + m->data_off + m->data_len; + m->data_len = (uint16_t)(m->data_len + len); + m_head->pkt_len = (m_head->pkt_len + len); + return tail; +} + +static inline int +enqueue_enc_one_op_cb(struct fpga_queue *q, struct rte_bbdev_enc_op *op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + struct rte_mbuf *input; + struct rte_mbuf *output; + int ret; + uint16_t k, e, ncb, ring_offset; + uint32_t total_left, in_length, out_length, in_offset, out_offset; + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + /* Validate op structure */ + if (validate_enc_op(op) == -1) { + rte_bbdev_log(ERR, "Turbo encoder validation failed"); + return -EINVAL; + } +#endif + + input = op->turbo_enc.input.data; + output = op->turbo_enc.output.data; + in_offset = op->turbo_enc.input.offset; + out_offset = op->turbo_enc.output.offset; + total_left = op->turbo_enc.input.length; + k = op->turbo_enc.cb_params.k; + e = op->turbo_enc.cb_params.e; + ncb = op->turbo_enc.cb_params.ncb; + + if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH)) + in_length = ((k - 24) >> 3); + else + in_length = k >> 3; + + if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_RATE_MATCH)) + out_length = (e + 7) >> 3; + else + out_length = (k >> 3) * 3 + 2; + + mbuf_append(output, output, out_length); + + /* Offset into the ring */ + ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); + /* Setup DMA Descriptor */ + desc = q->ring_addr + ring_offset; + + ret = fpga_dma_desc_te_fill(op, &desc->enc_req, input, output, k, e, + ncb, in_offset, out_offset, ring_offset, 1); + if (unlikely(ret < 0)) + return ret; + + /* Update lengths */ + total_left -= in_length; + op->turbo_enc.output.length += out_length; + + if (total_left > 0) { + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", + total_left, in_length); + return -1; + } + + return 1; +} + +static inline int +enqueue_enc_one_op_tb(struct fpga_queue *q, struct rte_bbdev_enc_op *op, + uint16_t desc_offset, uint8_t cbs_in_op) +{ + union fpga_dma_desc *desc; + struct rte_mbuf *input, *output_head, *output; + int ret; + uint8_t r, c, crc24_bits = 0; + uint16_t k, e, ncb, ring_offset; + uint32_t mbuf_total_left, in_length, out_length, in_offset, out_offset; + uint32_t seg_total_left; + uint16_t current_enqueued_cbs = 0; + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + /* Validate op structure */ + if (validate_enc_op(op) == -1) { + rte_bbdev_log(ERR, "Turbo encoder validation failed"); + return -EINVAL; + } +#endif + + input = op->turbo_enc.input.data; + output_head = output = op->turbo_enc.output.data; + in_offset = op->turbo_enc.input.offset; + out_offset = op->turbo_enc.output.offset; + mbuf_total_left = op->turbo_enc.input.length; + + c = op->turbo_enc.tb_params.c; + r = op->turbo_enc.tb_params.r; + + if (check_bit(op->turbo_enc.op_flags, RTE_BBDEV_TURBO_CRC_24B_ATTACH)) + crc24_bits = 24; + + while (mbuf_total_left > 0 && r < c && input != NULL) { + seg_total_left = rte_pktmbuf_data_len(input) - in_offset; + + e = (r < op->turbo_enc.tb_params.cab) ? + op->turbo_enc.tb_params.ea : + op->turbo_enc.tb_params.eb; + k = (r < op->turbo_enc.tb_params.c_neg) ? + op->turbo_enc.tb_params.k_neg : + op->turbo_enc.tb_params.k_pos; + ncb = (r < op->turbo_enc.tb_params.c_neg) ? + op->turbo_enc.tb_params.ncb_neg : + op->turbo_enc.tb_params.ncb_pos; + + in_length = ((k - crc24_bits) >> 3); + + if (check_bit(op->turbo_enc.op_flags, + RTE_BBDEV_TURBO_RATE_MATCH)) + out_length = (e + 7) >> 3; + else + out_length = (k >> 3) * 3 + 2; + + mbuf_append(output_head, output, out_length); + + /* Setup DMA Descriptor */ + ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); + desc = q->ring_addr + ring_offset; + ret = fpga_dma_desc_te_fill(op, &desc->enc_req, input, output, + k, e, ncb, in_offset, out_offset, ring_offset, + cbs_in_op); + if (unlikely(ret < 0)) + return ret; + + rte_bbdev_log_debug("DMA request desc %p", desc); + + /* Update lengths */ + op->turbo_enc.output.length += out_length; + mbuf_total_left -= in_length; + + /* Update offsets */ + if (seg_total_left == in_length) { + /* Go to the next mbuf */ + input = input->next; + output = output->next; + in_offset = 0; + out_offset = 0; + } else { + in_offset += in_length; + out_offset += out_length; + } + + r++; + desc_offset++; + current_enqueued_cbs++; + } + + if (mbuf_total_left > 0) { + rte_bbdev_log(ERR, + "Some date still left for processing: mbuf_total_left = %u", + mbuf_total_left); + return -1; + } + + return current_enqueued_cbs; +} + +#ifdef RTE_LIBRTE_BBDEV_DEBUG +/* Validates turbo decoder parameters */ +static int +validate_dec_op(struct rte_bbdev_dec_op *op) +{ + struct rte_bbdev_op_turbo_dec *turbo_dec = &op->turbo_dec; + struct rte_bbdev_op_dec_turbo_cb_params *cb = NULL; + struct rte_bbdev_op_dec_turbo_tb_params *tb = NULL; + + if (op->mempool == NULL) { + rte_bbdev_log(ERR, "Invalid mempool pointer"); + return -1; + } + if (turbo_dec->input.data == NULL) { + rte_bbdev_log(ERR, "Invalid input pointer"); + return -1; + } + if (turbo_dec->hard_output.data == NULL) { + rte_bbdev_log(ERR, "Invalid hard_output pointer"); + return -1; + } + if (turbo_dec->rv_index > 3) { + rte_bbdev_log(ERR, + "rv_index (%u) is out of range 0 <= value <= 3", + turbo_dec->rv_index); + return -1; + } + if (turbo_dec->iter_min < 1) { + rte_bbdev_log(ERR, + "iter_min (%u) is less than 1", + turbo_dec->iter_min); + return -1; + } + if (turbo_dec->iter_max <= 2) { + rte_bbdev_log(ERR, + "iter_max (%u) is less than or equal to 2", + turbo_dec->iter_max); + return -1; + } + if (turbo_dec->iter_min > turbo_dec->iter_max) { + rte_bbdev_log(ERR, + "iter_min (%u) is greater than iter_max (%u)", + turbo_dec->iter_min, turbo_dec->iter_max); + return -1; + } + if (turbo_dec->code_block_mode != 0 && + turbo_dec->code_block_mode != 1) { + rte_bbdev_log(ERR, + "code_block_mode (%u) is out of range 0 <= value <= 1", + turbo_dec->code_block_mode); + return -1; + } + + if (turbo_dec->code_block_mode == 0) { + + if ((turbo_dec->op_flags & + RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP) && + !(turbo_dec->op_flags & RTE_BBDEV_TURBO_CRC_TYPE_24B)) { + rte_bbdev_log(ERR, + "RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP should accompany RTE_BBDEV_TURBO_CRC_TYPE_24B"); + return -1; + } + + tb = &turbo_dec->tb_params; + if ((tb->k_neg < RTE_BBDEV_TURBO_MIN_CB_SIZE + || tb->k_neg > RTE_BBDEV_TURBO_MAX_CB_SIZE) + && tb->c_neg > 0) { + rte_bbdev_log(ERR, + "k_neg (%u) is out of range %u <= value <= %u", + tb->k_neg, RTE_BBDEV_TURBO_MIN_CB_SIZE, + RTE_BBDEV_TURBO_MAX_CB_SIZE); + return -1; + } + if ((tb->k_pos < RTE_BBDEV_TURBO_MIN_CB_SIZE + || tb->k_pos > RTE_BBDEV_TURBO_MAX_CB_SIZE) + && tb->c > tb->c_neg) { + rte_bbdev_log(ERR, + "k_pos (%u) is out of range %u <= value <= %u", + tb->k_pos, RTE_BBDEV_TURBO_MIN_CB_SIZE, + RTE_BBDEV_TURBO_MAX_CB_SIZE); + return -1; + } + if (tb->c_neg > (RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1)) + rte_bbdev_log(ERR, + "c_neg (%u) is out of range 0 <= value <= %u", + tb->c_neg, + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS - 1); + if (tb->c < 1 || tb->c > RTE_BBDEV_TURBO_MAX_CODE_BLOCKS) { + rte_bbdev_log(ERR, + "c (%u) is out of range 1 <= value <= %u", + tb->c, RTE_BBDEV_TURBO_MAX_CODE_BLOCKS); + return -1; + } + if (tb->cab > tb->c) { + rte_bbdev_log(ERR, + "cab (%u) is greater than c (%u)", + tb->cab, tb->c); + return -1; + } + } else { + + if (turbo_dec->op_flags & RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP) { + rte_bbdev_log(ERR, + "RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP is invalid in CB-mode"); + return -1; + } + + cb = &turbo_dec->cb_params; + if (cb->k < RTE_BBDEV_TURBO_MIN_CB_SIZE + || cb->k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { + rte_bbdev_log(ERR, + "k (%u) is out of range %u <= value <= %u", + cb->k, RTE_BBDEV_TURBO_MIN_CB_SIZE, + RTE_BBDEV_TURBO_MAX_CB_SIZE); + return -1; + } + } + + return 0; +} +#endif + +static inline int +enqueue_dec_one_op_cb(struct fpga_queue *q, struct rte_bbdev_dec_op *op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + struct rte_mbuf *input; + struct rte_mbuf *output; + int ret; + uint16_t k, kw, ring_offset; + uint32_t total_left, in_length, out_length, in_offset, out_offset; + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + /* Validate op structure */ + if (validate_dec_op(op) == -1) { + rte_bbdev_log(ERR, "Turbo decoder validation failed"); + return -EINVAL; + } +#endif + + input = op->turbo_dec.input.data; + output = op->turbo_dec.hard_output.data; + total_left = op->turbo_dec.input.length; + in_offset = op->turbo_dec.input.offset; + out_offset = op->turbo_dec.hard_output.offset; + + k = op->turbo_dec.cb_params.k; + kw = RTE_ALIGN_CEIL(k + 4, 32) * 3; + in_length = kw; + out_length = k >> 3; + + mbuf_append(output, output, out_length); + + /* Setup DMA Descriptor */ + ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); + desc = q->ring_addr + ring_offset; + ret = fpga_dma_desc_td_fill(op, &desc->dec_req, input, output, + in_length, k, in_offset, out_offset, ring_offset, 1); + if (unlikely(ret < 0)) + return ret; + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + print_dma_dec_desc_debug_info(desc); +#endif + + /* Update lengths */ + total_left -= in_length; + op->turbo_dec.hard_output.length += out_length; + + if (total_left > 0) { + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included CB sizes: mbuf len %u, cb len %u", + total_left, in_length); + return -1; + } + + return 1; +} + + +static inline int +enqueue_dec_one_op_tb(struct fpga_queue *q, struct rte_bbdev_dec_op *op, + uint16_t desc_offset, uint8_t cbs_in_op) +{ + union fpga_dma_desc *desc; + struct rte_mbuf *input, *output_head, *output; + int ret; + uint8_t r, c; + uint16_t k, kw, in_length, out_length, ring_offset; + uint32_t mbuf_total_left, seg_total_left, in_offset, out_offset; + uint16_t current_enqueued_cbs = 0; + uint16_t crc24_overlap = 0; + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + /* Validate op structure */ + if (validate_dec_op(op) == -1) { + rte_bbdev_log(ERR, "Turbo decoder validation failed"); + return -EINVAL; + } +#endif + + input = op->turbo_dec.input.data; + output_head = output = op->turbo_dec.hard_output.data; + mbuf_total_left = op->turbo_dec.input.length; + in_offset = op->turbo_dec.input.offset; + out_offset = op->turbo_dec.hard_output.offset; + + if (!check_bit(op->turbo_dec.op_flags, + RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) + crc24_overlap = 24; + + c = op->turbo_dec.tb_params.c; + r = op->turbo_dec.tb_params.r; + + while (mbuf_total_left > 0 && r < c && input != NULL) { + seg_total_left = rte_pktmbuf_data_len(input) - in_offset; + k = (r < op->turbo_dec.tb_params.c_neg) ? + op->turbo_dec.tb_params.k_neg : + op->turbo_dec.tb_params.k_pos; + kw = RTE_ALIGN_CEIL(k + 4, 32) * 3; + + in_length = kw; + out_length = (k - crc24_overlap) >> 3; + + mbuf_append(output_head, output, out_length); + + if (seg_total_left < in_length) { + rte_bbdev_log(ERR, + "Partial CB found in a TB. FPGA Driver doesn't support scatter-gather operations!"); + return -1; + } + + /* Setup DMA Descriptor */ + ring_offset = ((q->tail + desc_offset) & q->sw_ring_wrap_mask); + desc = q->ring_addr + ring_offset; + ret = fpga_dma_desc_td_fill(op, &desc->dec_req, input, output, + in_length, k, in_offset, out_offset, + ring_offset, cbs_in_op); + if (unlikely(ret < 0)) + return ret; + + /* Update lengths */ + ret = rte_pktmbuf_trim(op->turbo_dec.hard_output.data, + (crc24_overlap >> 3)); +#ifdef RTE_LIBRTE_BBDEV_DEBUG + if (ret < 0) { + rte_bbdev_log(ERR, + "The length to remove is greater than the length of the last segment"); + return -EINVAL; + } +#endif + op->turbo_dec.hard_output.length += out_length; + mbuf_total_left -= in_length; + + /* Update offsets */ + if (seg_total_left == in_length) { + /* Go to the next mbuf */ + input = input->next; + output = output->next; + in_offset = 0; + out_offset = 0; + } else { + in_offset += in_length; + out_offset += out_length; + } + + r++; + desc_offset++; + current_enqueued_cbs++; + } + + if (mbuf_total_left > 0) { + rte_bbdev_log(ERR, + "Some date still left for processing: mbuf_total_left = %u", + mbuf_total_left); + return -1; + } + + return current_enqueued_cbs; +} + +static uint16_t +fpga_enqueue_enc(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t num) +{ + uint8_t cbs_in_op; + uint16_t i, total_enqueued_cbs = 0; + int32_t avail; + int enqueued_cbs; + struct fpga_queue *q = q_data->queue_private; + union fpga_dma_desc *desc; + + /* Check if queue is not full */ + if (unlikely(((q->tail + 1) & q->sw_ring_wrap_mask) == + q->head_free_desc)) + return 0; + + /* Calculates available space */ + avail = (q->head_free_desc > q->tail) ? + q->head_free_desc - q->tail - 1 : + q->ring_ctrl_reg.ring_size + q->head_free_desc - q->tail - 1; + + for (i = 0; i < num; ++i) { + if (ops[i]->turbo_enc.code_block_mode == 0) { + cbs_in_op = get_num_cbs_in_op_enc(&ops[i]->turbo_enc); + /* Check if there is available space for further + * processing + */ + if (unlikely(avail - cbs_in_op < 0)) + break; + avail -= cbs_in_op; + enqueued_cbs = enqueue_enc_one_op_tb(q, ops[i], + total_enqueued_cbs, cbs_in_op); + } else { + /* Check if there is available space for further + * processing + */ + if (unlikely(avail - 1 < 0)) + break; + avail -= 1; + enqueued_cbs = enqueue_enc_one_op_cb(q, ops[i], + total_enqueued_cbs); + } + + if (enqueued_cbs < 0) + break; + + total_enqueued_cbs += enqueued_cbs; + + rte_bbdev_log_debug("enqueuing enc ops [%d/%d] | head %d | tail %d", + total_enqueued_cbs, num, + q->head_free_desc, q->tail); + } + + /* Set interrupt bit for last CB in enqueued ops. FPGA issues interrupt + * only when all previous CBs were already processed. + */ + desc = q->ring_addr + ((q->tail + total_enqueued_cbs - 1) + & q->sw_ring_wrap_mask); + desc->enc_req.irq_en = q->irq_enable; + + fpga_dma_enqueue(q, total_enqueued_cbs, &q_data->queue_stats); + + /* Update stats */ + q_data->queue_stats.enqueued_count += i; + q_data->queue_stats.enqueue_err_count += num - i; + + return i; +} + +static uint16_t +fpga_enqueue_dec(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t num) +{ + uint8_t cbs_in_op; + uint16_t i, total_enqueued_cbs = 0; + int32_t avail; + int enqueued_cbs; + struct fpga_queue *q = q_data->queue_private; + union fpga_dma_desc *desc; + + /* Check if queue is not full */ + if (unlikely(((q->tail + 1) & q->sw_ring_wrap_mask) == + q->head_free_desc)) + return 0; + + /* Calculates available space */ + avail = (q->head_free_desc > q->tail) ? + q->head_free_desc - q->tail - 1 : + q->ring_ctrl_reg.ring_size + q->head_free_desc - q->tail - 1; + + for (i = 0; i < num; ++i) { + if (ops[i]->turbo_dec.code_block_mode == 0) { + cbs_in_op = get_num_cbs_in_op_dec(&ops[i]->turbo_dec); + /* Check if there is available space for further + * processing + */ + if (unlikely(avail - cbs_in_op < 0)) + break; + avail -= cbs_in_op; + enqueued_cbs = enqueue_dec_one_op_tb(q, ops[i], + total_enqueued_cbs, cbs_in_op); + } else { + /* Check if there is available space for further + * processing + */ + if (unlikely(avail - 1 < 0)) + break; + avail -= 1; + enqueued_cbs = enqueue_dec_one_op_cb(q, ops[i], + total_enqueued_cbs); + } + + if (enqueued_cbs < 0) + break; + + total_enqueued_cbs += enqueued_cbs; + + rte_bbdev_log_debug("enqueuing dec ops [%d/%d] | head %d | tail %d", + total_enqueued_cbs, num, + q->head_free_desc, q->tail); + } + + /* Set interrupt bit for last CB in enqueued ops. FPGA issues interrupt + * only when all previous CBs were already processed. + */ + desc = q->ring_addr + ((q->tail + total_enqueued_cbs - 1) + & q->sw_ring_wrap_mask); + desc->dec_req.irq_en = q->irq_enable; + + fpga_dma_enqueue(q, total_enqueued_cbs, &q_data->queue_stats); + + /* Update stats */ + q_data->queue_stats.enqueued_count += i; + q_data->queue_stats.enqueue_err_count += num - i; + + return i; +} + +static inline int +dequeue_enc_one_op_cb(struct fpga_queue *q, struct rte_bbdev_enc_op **op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + int desc_error = 0; + + /* Set current desc */ + desc = q->ring_addr + ((q->head_free_desc + desc_offset) + & q->sw_ring_wrap_mask); + + /*check if done */ + if (desc->enc_req.done == 0) + return -1; + + /* make sure the response is read atomically */ + rte_smp_rmb(); + + rte_bbdev_log_debug("DMA response desc %p", desc); + + *op = desc->enc_req.op_addr; + /* Check the decriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + (*op)->status = desc_error << RTE_BBDEV_DATA_ERROR; + + return 1; +} + +static inline int +dequeue_enc_one_op_tb(struct fpga_queue *q, struct rte_bbdev_enc_op **op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + uint8_t cbs_in_op, cb_idx; + int desc_error = 0; + int status = 0; + + /* Set descriptor */ + desc = q->ring_addr + ((q->head_free_desc + desc_offset) + & q->sw_ring_wrap_mask); + + /* Verify if done bit is set */ + if (desc->enc_req.done == 0) + return -1; + + /* Make sure the response is read atomically */ + rte_smp_rmb(); + + /* Verify if done bit in all CBs is set */ + cbs_in_op = desc->enc_req.cbs_in_op; + for (cb_idx = 1; cb_idx < cbs_in_op; ++cb_idx) { + desc = q->ring_addr + ((q->head_free_desc + desc_offset + + cb_idx) & q->sw_ring_wrap_mask); + if (desc->enc_req.done == 0) + return -1; + } + + /* Make sure the response is read atomically */ + rte_smp_rmb(); + + for (cb_idx = 0; cb_idx < cbs_in_op; ++cb_idx) { + desc = q->ring_addr + ((q->head_free_desc + desc_offset + + cb_idx) & q->sw_ring_wrap_mask); + /* Check the decriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + status |= desc_error << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log_debug("DMA response desc %p", desc); + } + + *op = desc->enc_req.op_addr; + (*op)->status = status; + return cbs_in_op; +} + +static inline int +dequeue_dec_one_op_cb(struct fpga_queue *q, struct rte_bbdev_dec_op **op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + int desc_error = 0; + /* Set descriptor */ + desc = q->ring_addr + ((q->head_free_desc + desc_offset) + & q->sw_ring_wrap_mask); + + /* Verify done bit is set */ + if (desc->dec_req.done == 0) + return -1; + + /* make sure the response is read atomically */ + rte_smp_rmb(); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + print_dma_dec_desc_debug_info(desc); + +#endif + + *op = desc->dec_req.op_addr; + /* FPGA reports in half-iterations, from 0 to 31. get ceiling */ + (*op)->turbo_dec.iter_count = (desc->dec_req.iter + 2) >> 1; + /* crc_pass = 0 when decoder fails */ + (*op)->status = !(desc->dec_req.crc_pass) << RTE_BBDEV_CRC_ERROR; + /* Check the decriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + (*op)->status |= desc_error << RTE_BBDEV_DATA_ERROR; + return 1; +} + +static inline int +dequeue_dec_one_op_tb(struct fpga_queue *q, struct rte_bbdev_dec_op **op, + uint16_t desc_offset) +{ + union fpga_dma_desc *desc; + uint8_t cbs_in_op, cb_idx, iter_count = 0; + int status = 0; + int desc_error = 0; + /* Set descriptor */ + desc = q->ring_addr + ((q->head_free_desc + desc_offset) + & q->sw_ring_wrap_mask); + + /* Verify if done bit is set */ + if (desc->dec_req.done == 0) + return -1; + + /* Make sure the response is read atomically */ + rte_smp_rmb(); + + /* Verify if done bit in all CBs is set */ + cbs_in_op = desc->dec_req.cbs_in_op; + for (cb_idx = 1; cb_idx < cbs_in_op; ++cb_idx) { + desc = q->ring_addr + ((q->head_free_desc + desc_offset + + cb_idx) & q->sw_ring_wrap_mask); + if (desc->dec_req.done == 0) + return -1; + } + + /* Make sure the response is read atomically */ + rte_smp_rmb(); + + for (cb_idx = 0; cb_idx < cbs_in_op; ++cb_idx) { + desc = q->ring_addr + ((q->head_free_desc + desc_offset + + cb_idx) & q->sw_ring_wrap_mask); + /* get max iter_count for all CBs in op */ + iter_count = RTE_MAX(iter_count, (uint8_t) desc->dec_req.iter); + /* crc_pass = 0 when decoder fails, one fails all */ + status |= !(desc->dec_req.crc_pass) << RTE_BBDEV_CRC_ERROR; + /* Check the decriptor error field, return 1 on error */ + desc_error = check_desc_error(desc->enc_req.error); + status |= desc_error << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log_debug("DMA response desc %p", desc); + } + + *op = desc->dec_req.op_addr; + + /* FPGA reports in half-iterations, get ceiling */ + (*op)->turbo_dec.iter_count = (iter_count + 2) >> 1; + (*op)->status = status; + return cbs_in_op; +} + +static uint16_t +fpga_dequeue_enc(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t num) +{ + struct fpga_queue *q = q_data->queue_private; + uint32_t avail = (q->tail - q->head_free_desc) & q->sw_ring_wrap_mask; + uint16_t i; + uint16_t dequeued_cbs = 0; + struct rte_bbdev_enc_op *op; + int ret; + + for (i = 0; (i < num) && (dequeued_cbs < avail); ++i) { + op = (q->ring_addr + ((q->head_free_desc + dequeued_cbs) + & q->sw_ring_wrap_mask))->enc_req.op_addr; + if (op->turbo_enc.code_block_mode == 0) + ret = dequeue_enc_one_op_tb(q, &ops[i], dequeued_cbs); + else + ret = dequeue_enc_one_op_cb(q, &ops[i], dequeued_cbs); + + if (ret < 0) + break; + + dequeued_cbs += ret; + + rte_bbdev_log_debug("dequeuing enc ops [%d/%d] | head %d | tail %d", + dequeued_cbs, num, q->head_free_desc, q->tail); + } + + /* Update head */ + q->head_free_desc = (q->head_free_desc + dequeued_cbs) & + q->sw_ring_wrap_mask; + + /* Update stats */ + q_data->queue_stats.dequeued_count += i; + + return i; +} + +static uint16_t +fpga_dequeue_dec(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t num) +{ + struct fpga_queue *q = q_data->queue_private; + uint32_t avail = (q->tail - q->head_free_desc) & q->sw_ring_wrap_mask; + uint16_t i; + uint16_t dequeued_cbs = 0; + struct rte_bbdev_dec_op *op; + int ret; + + for (i = 0; (i < num) && (dequeued_cbs < avail); ++i) { + op = (q->ring_addr + ((q->head_free_desc + dequeued_cbs) + & q->sw_ring_wrap_mask))->dec_req.op_addr; + if (op->turbo_dec.code_block_mode == 0) + ret = dequeue_dec_one_op_tb(q, &ops[i], dequeued_cbs); + else + ret = dequeue_dec_one_op_cb(q, &ops[i], dequeued_cbs); + + if (ret < 0) + break; + + dequeued_cbs += ret; + + rte_bbdev_log_debug("dequeuing dec ops [%d/%d] | head %d | tail %d", + dequeued_cbs, num, q->head_free_desc, q->tail); + } + + /* Update head */ + q->head_free_desc = (q->head_free_desc + dequeued_cbs) & + q->sw_ring_wrap_mask; + + /* Update stats */ + q_data->queue_stats.dequeued_count += i; + + return i; +} + +/* Initialization Function */ +static void +fpga_lte_fec_init(struct rte_bbdev *dev, struct rte_pci_driver *drv) +{ + struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev->device); + + dev->dev_ops = &fpga_ops; + dev->enqueue_enc_ops = fpga_enqueue_enc; + dev->enqueue_dec_ops = fpga_enqueue_dec; + dev->dequeue_enc_ops = fpga_dequeue_enc; + dev->dequeue_dec_ops = fpga_dequeue_dec; + + ((struct fpga_lte_fec_device *) dev->data->dev_private)->pf_device = + !strcmp(drv->driver.name, + RTE_STR(FPGA_LTE_FEC_PF_DRIVER_NAME)); + ((struct fpga_lte_fec_device *) dev->data->dev_private)->mmio_base = + pci_dev->mem_resource[0].addr; + + rte_bbdev_log_debug( + "Init device %s [%s] @ virtaddr %p phyaddr %#"PRIx64, + dev->device->driver->name, dev->data->name, + (void *)pci_dev->mem_resource[0].addr, + pci_dev->mem_resource[0].phys_addr); +} + +static int +fpga_lte_fec_probe(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) +{ + struct rte_bbdev *bbdev = NULL; + char dev_name[RTE_BBDEV_NAME_MAX_LEN]; + + if (pci_dev == NULL) { + rte_bbdev_log(ERR, "NULL PCI device"); + return -EINVAL; + } + + rte_pci_device_name(&pci_dev->addr, dev_name, sizeof(dev_name)); + + /* Allocate memory to be used privately by drivers */ + bbdev = rte_bbdev_allocate(pci_dev->device.name); + if (bbdev == NULL) + return -ENODEV; + + /* allocate device private memory */ + bbdev->data->dev_private = rte_zmalloc_socket(dev_name, + sizeof(struct fpga_lte_fec_device), RTE_CACHE_LINE_SIZE, + pci_dev->device.numa_node); + + if (bbdev->data->dev_private == NULL) { + rte_bbdev_log(CRIT, + "Allocate of %zu bytes for device \"%s\" failed", + sizeof(struct fpga_lte_fec_device), dev_name); + rte_bbdev_release(bbdev); + return -ENOMEM; + } + + /* Fill HW specific part of device structure */ + bbdev->device = &pci_dev->device; + bbdev->intr_handle = &pci_dev->intr_handle; + bbdev->data->socket_id = pci_dev->device.numa_node; + + /* Invoke FEC FPGA device initialization function */ + fpga_lte_fec_init(bbdev, pci_drv); + + rte_bbdev_log_debug("bbdev id = %u [%s]", + bbdev->data->dev_id, dev_name); + + struct fpga_lte_fec_device *d = bbdev->data->dev_private; + uint32_t version_id = fpga_reg_read_32(d->mmio_base, + FPGA_LTE_FEC_VERSION_ID); + rte_bbdev_log(INFO, "FEC FPGA RTL v%u.%u", + ((uint16_t)(version_id >> 16)), ((uint16_t)version_id)); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + if (!strcmp(bbdev->device->driver->name, + RTE_STR(FPGA_LTE_FEC_PF_DRIVER_NAME))) + print_static_reg_debug_info(d->mmio_base); +#endif + return 0; +} + +static int +fpga_lte_fec_remove(struct rte_pci_device *pci_dev) +{ + struct rte_bbdev *bbdev; + int ret; + uint8_t dev_id; + + if (pci_dev == NULL) + return -EINVAL; + + /* Find device */ + bbdev = rte_bbdev_get_named_dev(pci_dev->device.name); + if (bbdev == NULL) { + rte_bbdev_log(CRIT, + "Couldn't find HW dev \"%s\" to uninitialise it", + pci_dev->device.name); + return -ENODEV; + } + dev_id = bbdev->data->dev_id; + + /* free device private memory before close */ + rte_free(bbdev->data->dev_private); + + /* Close device */ + ret = rte_bbdev_close(dev_id); + if (ret < 0) + rte_bbdev_log(ERR, + "Device %i failed to close during uninit: %i", + dev_id, ret); + + /* release bbdev from library */ + ret = rte_bbdev_release(bbdev); + if (ret) + rte_bbdev_log(ERR, "Device %i failed to uninit: %i", dev_id, + ret); + + rte_bbdev_log_debug("Destroyed bbdev = %u", dev_id); + + return 0; +} + +static inline void +set_default_fpga_conf(struct fpga_lte_fec_conf *def_conf) +{ + /* clear default configuration before initialization */ + memset(def_conf, 0, sizeof(struct fpga_lte_fec_conf)); + /* Set pf mode to true */ + def_conf->pf_mode_en = true; + + /* Set ratio between UL and DL to 1:1 (unit of weight is 3 CBs) */ + def_conf->ul_bandwidth = 3; + def_conf->dl_bandwidth = 3; + + /* Set Load Balance Factor to 64 */ + def_conf->dl_load_balance = 64; + def_conf->ul_load_balance = 64; +} + +/* Initial configuration of FPGA LTE FEC device */ +int +fpga_lte_fec_configure(const char *dev_name, + const struct fpga_lte_fec_conf *conf) +{ + uint32_t payload_32, address; + uint16_t payload_16; + uint8_t payload_8; + uint16_t q_id, vf_id, total_q_id, total_ul_q_id, total_dl_q_id; + struct rte_bbdev *bbdev = rte_bbdev_get_named_dev(dev_name); + struct fpga_lte_fec_conf def_conf; + + if (bbdev == NULL) { + rte_bbdev_log(ERR, + "Invalid dev_name (%s), or device is not yet initialised", + dev_name); + return -ENODEV; + } + + struct fpga_lte_fec_device *d = bbdev->data->dev_private; + + if (conf == NULL) { + rte_bbdev_log(ERR, + "FPGA Configuration was not provided. Default configuration will be loaded."); + set_default_fpga_conf(&def_conf); + conf = &def_conf; + } + + /* + * Configure UL:DL ratio. + * [7:0]: UL weight + * [15:8]: DL weight + */ + payload_16 = (conf->dl_bandwidth << 8) | conf->ul_bandwidth; + address = FPGA_LTE_FEC_CONFIGURATION; + fpga_reg_write_16(d->mmio_base, address, payload_16); + + /* Clear all queues registers */ + payload_32 = FPGA_INVALID_HW_QUEUE_ID; + for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { + address = (q_id << 2) + FPGA_LTE_FEC_QUEUE_MAP; + fpga_reg_write_32(d->mmio_base, address, payload_32); + } + + /* + * If PF mode is enabled allocate all queues for PF only. + * + * For VF mode each VF can have different number of UL and DL queues. + * Total number of queues to configure cannot exceed FPGA + * capabilities - 64 queues - 32 queues for UL and 32 queues for DL. + * Queues mapping is done according to configuration: + * + * UL queues: + * | Q_ID | VF_ID | + * | 0 | 0 | + * | ... | 0 | + * | conf->vf_dl_queues_number[0] - 1 | 0 | + * | conf->vf_dl_queues_number[0] | 1 | + * | ... | 1 | + * | conf->vf_dl_queues_number[1] - 1 | 1 | + * | ... | ... | + * | conf->vf_dl_queues_number[7] - 1 | 7 | + * + * DL queues: + * | Q_ID | VF_ID | + * | 32 | 0 | + * | ... | 0 | + * | conf->vf_ul_queues_number[0] - 1 | 0 | + * | conf->vf_ul_queues_number[0] | 1 | + * | ... | 1 | + * | conf->vf_ul_queues_number[1] - 1 | 1 | + * | ... | ... | + * | conf->vf_ul_queues_number[7] - 1 | 7 | + * + * Example of configuration: + * conf->vf_ul_queues_number[0] = 4; -> 4 UL queues for VF0 + * conf->vf_dl_queues_number[0] = 4; -> 4 DL queues for VF0 + * conf->vf_ul_queues_number[1] = 2; -> 2 UL queues for VF1 + * conf->vf_dl_queues_number[1] = 2; -> 2 DL queues for VF1 + * + * UL: + * | Q_ID | VF_ID | + * | 0 | 0 | + * | 1 | 0 | + * | 2 | 0 | + * | 3 | 0 | + * | 4 | 1 | + * | 5 | 1 | + * + * DL: + * | Q_ID | VF_ID | + * | 32 | 0 | + * | 33 | 0 | + * | 34 | 0 | + * | 35 | 0 | + * | 36 | 1 | + * | 37 | 1 | + */ + if (conf->pf_mode_en) { + payload_32 = 0x1; + for (q_id = 0; q_id < FPGA_TOTAL_NUM_QUEUES; ++q_id) { + address = (q_id << 2) + FPGA_LTE_FEC_QUEUE_MAP; + fpga_reg_write_32(d->mmio_base, address, payload_32); + } + } else { + /* Calculate total number of UL and DL queues to configure */ + total_ul_q_id = total_dl_q_id = 0; + for (vf_id = 0; vf_id < FPGA_LTE_FEC_NUM_VFS; ++vf_id) { + total_ul_q_id += conf->vf_ul_queues_number[vf_id]; + total_dl_q_id += conf->vf_dl_queues_number[vf_id]; + } + total_q_id = total_dl_q_id + total_ul_q_id; + /* + * Check if total number of queues to configure does not exceed + * FPGA capabilities (64 queues - 32 UL and 32 DL queues) + */ + if ((total_ul_q_id > FPGA_NUM_UL_QUEUES) || + (total_dl_q_id > FPGA_NUM_DL_QUEUES) || + (total_q_id > FPGA_TOTAL_NUM_QUEUES)) { + rte_bbdev_log(ERR, + "FPGA Configuration failed. Too many queues to configure: UL_Q %u, DL_Q %u, FPGA_Q %u", + total_ul_q_id, total_dl_q_id, + FPGA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + total_ul_q_id = 0; + for (vf_id = 0; vf_id < FPGA_LTE_FEC_NUM_VFS; ++vf_id) { + for (q_id = 0; q_id < conf->vf_ul_queues_number[vf_id]; + ++q_id, ++total_ul_q_id) { + address = (total_ul_q_id << 2) + + FPGA_LTE_FEC_QUEUE_MAP; + payload_32 = ((0x80 + vf_id) << 16) | 0x1; + fpga_reg_write_32(d->mmio_base, address, + payload_32); + } + } + total_dl_q_id = 0; + for (vf_id = 0; vf_id < FPGA_LTE_FEC_NUM_VFS; ++vf_id) { + for (q_id = 0; q_id < conf->vf_dl_queues_number[vf_id]; + ++q_id, ++total_dl_q_id) { + address = ((total_dl_q_id + FPGA_NUM_UL_QUEUES) + << 2) + FPGA_LTE_FEC_QUEUE_MAP; + payload_32 = ((0x80 + vf_id) << 16) | 0x1; + fpga_reg_write_32(d->mmio_base, address, + payload_32); + } + } + } + + /* Setting Load Balance Factor */ + payload_16 = (conf->dl_load_balance << 8) | (conf->ul_load_balance); + address = FPGA_LTE_FEC_LOAD_BALANCE_FACTOR; + fpga_reg_write_16(d->mmio_base, address, payload_16); + + /* Setting length of ring descriptor entry */ + payload_16 = FPGA_RING_DESC_ENTRY_LENGTH; + address = FPGA_LTE_FEC_RING_DESC_LEN; + fpga_reg_write_16(d->mmio_base, address, payload_16); + + /* Setting FLR timeout value */ + payload_16 = conf->flr_time_out; + address = FPGA_LTE_FEC_FLR_TIME_OUT; + fpga_reg_write_16(d->mmio_base, address, payload_16); + + /* Queue PF/VF mapping table is ready */ + payload_8 = 0x1; + address = FPGA_LTE_FEC_QUEUE_PF_VF_MAP_DONE; + fpga_reg_write_8(d->mmio_base, address, payload_8); + + rte_bbdev_log_debug("PF FPGA LTE FEC configuration complete for %s", + dev_name); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + print_static_reg_debug_info(d->mmio_base); +#endif + return 0; +} + +/* FPGA LTE FEC PCI PF address map */ +static struct rte_pci_id pci_id_fpga_lte_fec_pf_map[] = { + { + RTE_PCI_DEVICE(FPGA_LTE_FEC_VENDOR_ID, + FPGA_LTE_FEC_PF_DEVICE_ID) + }, + {.device_id = 0}, +}; + +static struct rte_pci_driver fpga_lte_fec_pci_pf_driver = { + .probe = fpga_lte_fec_probe, + .remove = fpga_lte_fec_remove, + .id_table = pci_id_fpga_lte_fec_pf_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING +}; + +/* FPGA LTE FEC PCI VF address map */ +static struct rte_pci_id pci_id_fpga_lte_fec_vf_map[] = { + { + RTE_PCI_DEVICE(FPGA_LTE_FEC_VENDOR_ID, + FPGA_LTE_FEC_VF_DEVICE_ID) + }, + {.device_id = 0}, +}; + +static struct rte_pci_driver fpga_lte_fec_pci_vf_driver = { + .probe = fpga_lte_fec_probe, + .remove = fpga_lte_fec_remove, + .id_table = pci_id_fpga_lte_fec_vf_map, + .drv_flags = RTE_PCI_DRV_NEED_MAPPING +}; + + +RTE_PMD_REGISTER_PCI(FPGA_LTE_FEC_PF_DRIVER_NAME, fpga_lte_fec_pci_pf_driver); +RTE_PMD_REGISTER_PCI_TABLE(FPGA_LTE_FEC_PF_DRIVER_NAME, + pci_id_fpga_lte_fec_pf_map); +RTE_PMD_REGISTER_PCI(FPGA_LTE_FEC_VF_DRIVER_NAME, fpga_lte_fec_pci_vf_driver); +RTE_PMD_REGISTER_PCI_TABLE(FPGA_LTE_FEC_VF_DRIVER_NAME, + pci_id_fpga_lte_fec_vf_map); + +RTE_INIT(fpga_lte_fec_init_log) +{ + fpga_lte_fec_logtype = rte_log_register("pmd.bb.fpga_lte_fec"); + if (fpga_lte_fec_logtype >= 0) +#ifdef RTE_LIBRTE_BBDEV_DEBUG + rte_log_set_level(fpga_lte_fec_logtype, RTE_LOG_DEBUG); +#else + rte_log_set_level(fpga_lte_fec_logtype, RTE_LOG_NOTICE); +#endif +} diff --git a/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.h b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.h new file mode 100644 index 000000000..b2e423c87 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/fpga_lte_fec.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2019 Intel Corporation + */ + +#ifndef _FPGA_LTE_FEC_H_ +#define _FPGA_LTE_FEC_H_ + +#include <stdint.h> +#include <stdbool.h> + +/** + * @file fpga_lte_fec.h + * + * Interface for Intel(R) FGPA LTE FEC device configuration at the host level, + * directly accessible by the application. + * Configuration related to LTE Turbo coding functionality is done through + * librte_bbdev library. + * + * @warning + * @b EXPERIMENTAL: this API may change without prior notice + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/**< Number of Virtual Functions FGPA 4G FEC supports */ +#define FPGA_LTE_FEC_NUM_VFS 8 + +/** + * Structure to pass FPGA 4G FEC configuration. + */ +struct fpga_lte_fec_conf { + /**< 1 if PF is used for dataplane, 0 for VFs */ + bool pf_mode_en; + /**< Number of UL queues per VF */ + uint8_t vf_ul_queues_number[FPGA_LTE_FEC_NUM_VFS]; + /**< Number of DL queues per VF */ + uint8_t vf_dl_queues_number[FPGA_LTE_FEC_NUM_VFS]; + /**< UL bandwidth. Needed for schedule algorithm */ + uint8_t ul_bandwidth; + /**< DL bandwidth. Needed for schedule algorithm */ + uint8_t dl_bandwidth; + /**< UL Load Balance */ + uint8_t ul_load_balance; + /**< DL Load Balance */ + uint8_t dl_load_balance; + /**< FLR timeout value */ + uint16_t flr_time_out; +}; + +/** + * Configure Intel(R) FPGA LTE FEC device + * + * @param dev_name + * The name of the device. This is the short form of PCI BDF, e.g. 00:01.0. + * It can also be retrieved for a bbdev device from the dev_name field in the + * rte_bbdev_info structure returned by rte_bbdev_info_get(). + * @param conf + * Configuration to apply to FPGA 4G FEC. + * + * @return + * Zero on success, negative value on failure. + */ +__rte_experimental +int +fpga_lte_fec_configure(const char *dev_name, + const struct fpga_lte_fec_conf *conf); + +#ifdef __cplusplus +} +#endif + +#endif /* _FPGA_LTE_FEC_H_ */ diff --git a/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/meson.build b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/meson.build new file mode 100644 index 000000000..e00688da3 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/meson.build @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2019 Intel Corporation + +deps += ['bbdev', 'bus_vdev', 'ring', 'pci', 'bus_pci'] +sources = files('fpga_lte_fec.c') diff --git a/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/rte_pmd_bbdev_fpga_lte_fec_version.map b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/rte_pmd_bbdev_fpga_lte_fec_version.map new file mode 100644 index 000000000..6bcea2cc7 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/fpga_lte_fec/rte_pmd_bbdev_fpga_lte_fec_version.map @@ -0,0 +1,10 @@ +DPDK_20.0 { + local: *; +}; + +EXPERIMENTAL { + global: + + fpga_lte_fec_configure; + +}; diff --git a/src/spdk/dpdk/drivers/baseband/meson.build b/src/spdk/dpdk/drivers/baseband/meson.build new file mode 100644 index 000000000..4d909f9a6 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/meson.build @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Luca Boccassi <bluca@debian.org> + +drivers = ['null', 'turbo_sw', 'fpga_lte_fec', 'fpga_5gnr_fec'] + +config_flag_fmt = 'RTE_LIBRTE_PMD_BBDEV_@0@' +driver_name_fmt = 'rte_pmd_bbdev_@0@' diff --git a/src/spdk/dpdk/drivers/baseband/null/Makefile b/src/spdk/dpdk/drivers/baseband/null/Makefile new file mode 100644 index 000000000..0ee500166 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/null/Makefile @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk +# library name +LIB = librte_pmd_bbdev_null.a + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring -lrte_kvargs +LDLIBS += -lrte_bbdev +LDLIBS += -lrte_bus_vdev + +# versioning export map +EXPORT_MAP := rte_pmd_bbdev_null_version.map + +# library source files +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_NULL) += bbdev_null.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/baseband/null/bbdev_null.c b/src/spdk/dpdk/drivers/baseband/null/bbdev_null.c new file mode 100644 index 000000000..2f2515101 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/null/bbdev_null.c @@ -0,0 +1,356 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <string.h> + +#include <rte_common.h> +#include <rte_bus_vdev.h> +#include <rte_malloc.h> +#include <rte_ring.h> +#include <rte_kvargs.h> + +#include <rte_bbdev.h> +#include <rte_bbdev_pmd.h> + +#define DRIVER_NAME baseband_null + +/* NULL BBDev logging ID */ +static int bbdev_null_logtype; + +/* Helper macro for logging */ +#define rte_bbdev_log(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, bbdev_null_logtype, fmt "\n", ##__VA_ARGS__) + +#define rte_bbdev_log_debug(fmt, ...) \ + rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ + ##__VA_ARGS__) + +/* Initialisation params structure that can be used by null BBDEV driver */ +struct bbdev_null_params { + int socket_id; /*< Null BBDEV socket */ + uint16_t queues_num; /*< Null BBDEV queues number */ +}; + +/* Accecptable params for null BBDEV devices */ +#define BBDEV_NULL_MAX_NB_QUEUES_ARG "max_nb_queues" +#define BBDEV_NULL_SOCKET_ID_ARG "socket_id" + +static const char * const bbdev_null_valid_params[] = { + BBDEV_NULL_MAX_NB_QUEUES_ARG, + BBDEV_NULL_SOCKET_ID_ARG +}; + +/* private data structure */ +struct bbdev_private { + unsigned int max_nb_queues; /**< Max number of queues */ +}; + +/* queue */ +struct bbdev_queue { + struct rte_ring *processed_pkts; /* Ring for processed packets */ +} __rte_cache_aligned; + +/* Get device info */ +static void +info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) +{ + struct bbdev_private *internals = dev->data->dev_private; + + static const struct rte_bbdev_op_cap bbdev_capabilities[] = { + RTE_BBDEV_END_OF_CAPABILITIES_LIST(), + }; + + static struct rte_bbdev_queue_conf default_queue_conf = { + .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, + }; + + default_queue_conf.socket = dev->data->socket_id; + + dev_info->driver_name = RTE_STR(DRIVER_NAME); + dev_info->max_num_queues = internals->max_nb_queues; + dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT; + dev_info->hardware_accelerated = false; + dev_info->max_dl_queue_priority = 0; + dev_info->max_ul_queue_priority = 0; + dev_info->default_queue_conf = default_queue_conf; + dev_info->capabilities = bbdev_capabilities; + dev_info->cpu_flag_reqs = NULL; + dev_info->min_alignment = 0; + + rte_bbdev_log_debug("got device info from %u", dev->data->dev_id); +} + +/* Release queue */ +static int +q_release(struct rte_bbdev *dev, uint16_t q_id) +{ + struct bbdev_queue *q = dev->data->queues[q_id].queue_private; + + if (q != NULL) { + rte_ring_free(q->processed_pkts); + rte_free(q); + dev->data->queues[q_id].queue_private = NULL; + } + + rte_bbdev_log_debug("released device queue %u:%u", + dev->data->dev_id, q_id); + return 0; +} + +/* Setup a queue */ +static int +q_setup(struct rte_bbdev *dev, uint16_t q_id, + const struct rte_bbdev_queue_conf *queue_conf) +{ + struct bbdev_queue *q; + char ring_name[RTE_RING_NAMESIZE]; + snprintf(ring_name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME) "%u:%u", + dev->data->dev_id, q_id); + + /* Allocate the queue data structure. */ + q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q), + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q == NULL) { + rte_bbdev_log(ERR, "Failed to allocate queue memory"); + return -ENOMEM; + } + + q->processed_pkts = rte_ring_create(ring_name, queue_conf->queue_size, + queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ); + if (q->processed_pkts == NULL) { + rte_bbdev_log(ERR, "Failed to create ring"); + goto free_q; + } + + dev->data->queues[q_id].queue_private = q; + rte_bbdev_log_debug("setup device queue %s", ring_name); + return 0; + +free_q: + rte_free(q); + return -EFAULT; +} + +static const struct rte_bbdev_ops pmd_ops = { + .info_get = info_get, + .queue_setup = q_setup, + .queue_release = q_release +}; + +/* Enqueue decode burst */ +static uint16_t +enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t nb_ops) +{ + struct bbdev_queue *q = q_data->queue_private; + uint16_t nb_enqueued = rte_ring_enqueue_burst(q->processed_pkts, + (void **)ops, nb_ops, NULL); + + q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; + q_data->queue_stats.enqueued_count += nb_enqueued; + + return nb_enqueued; +} + +/* Enqueue encode burst */ +static uint16_t +enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t nb_ops) +{ + struct bbdev_queue *q = q_data->queue_private; + uint16_t nb_enqueued = rte_ring_enqueue_burst(q->processed_pkts, + (void **)ops, nb_ops, NULL); + + q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; + q_data->queue_stats.enqueued_count += nb_enqueued; + + return nb_enqueued; +} + +/* Dequeue decode burst */ +static uint16_t +dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t nb_ops) +{ + struct bbdev_queue *q = q_data->queue_private; + uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, + (void **)ops, nb_ops, NULL); + q_data->queue_stats.dequeued_count += nb_dequeued; + + return nb_dequeued; +} + +/* Dequeue encode burst */ +static uint16_t +dequeue_enc_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t nb_ops) +{ + struct bbdev_queue *q = q_data->queue_private; + uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, + (void **)ops, nb_ops, NULL); + q_data->queue_stats.dequeued_count += nb_dequeued; + + return nb_dequeued; +} + +/* Parse 16bit integer from string argument */ +static inline int +parse_u16_arg(const char *key, const char *value, void *extra_args) +{ + uint16_t *u16 = extra_args; + unsigned int long result; + + if ((value == NULL) || (extra_args == NULL)) + return -EINVAL; + errno = 0; + result = strtoul(value, NULL, 0); + if ((result >= (1 << 16)) || (errno != 0)) { + rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key); + return -ERANGE; + } + *u16 = (uint16_t)result; + return 0; +} + +/* Parse parameters used to create device */ +static int +parse_bbdev_null_params(struct bbdev_null_params *params, + const char *input_args) +{ + struct rte_kvargs *kvlist = NULL; + int ret = 0; + + if (params == NULL) + return -EINVAL; + if (input_args) { + kvlist = rte_kvargs_parse(input_args, bbdev_null_valid_params); + if (kvlist == NULL) + return -EFAULT; + + ret = rte_kvargs_process(kvlist, bbdev_null_valid_params[0], + &parse_u16_arg, ¶ms->queues_num); + if (ret < 0) + goto exit; + + ret = rte_kvargs_process(kvlist, bbdev_null_valid_params[1], + &parse_u16_arg, ¶ms->socket_id); + if (ret < 0) + goto exit; + + if (params->socket_id >= RTE_MAX_NUMA_NODES) { + rte_bbdev_log(ERR, "Invalid socket, must be < %u", + RTE_MAX_NUMA_NODES); + goto exit; + } + } + +exit: + if (kvlist) + rte_kvargs_free(kvlist); + return ret; +} + +/* Create device */ +static int +null_bbdev_create(struct rte_vdev_device *vdev, + struct bbdev_null_params *init_params) +{ + struct rte_bbdev *bbdev; + const char *name = rte_vdev_device_name(vdev); + + bbdev = rte_bbdev_allocate(name); + if (bbdev == NULL) + return -ENODEV; + + bbdev->data->dev_private = rte_zmalloc_socket(name, + sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE, + init_params->socket_id); + if (bbdev->data->dev_private == NULL) { + rte_bbdev_release(bbdev); + return -ENOMEM; + } + + bbdev->dev_ops = &pmd_ops; + bbdev->device = &vdev->device; + bbdev->data->socket_id = init_params->socket_id; + bbdev->intr_handle = NULL; + + /* register rx/tx burst functions for data path */ + bbdev->dequeue_enc_ops = dequeue_enc_ops; + bbdev->dequeue_dec_ops = dequeue_dec_ops; + bbdev->enqueue_enc_ops = enqueue_enc_ops; + bbdev->enqueue_dec_ops = enqueue_dec_ops; + ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = + init_params->queues_num; + + return 0; +} + +/* Initialise device */ +static int +null_bbdev_probe(struct rte_vdev_device *vdev) +{ + struct bbdev_null_params init_params = { + rte_socket_id(), + RTE_BBDEV_DEFAULT_MAX_NB_QUEUES + }; + const char *name; + const char *input_args; + + if (vdev == NULL) + return -EINVAL; + + name = rte_vdev_device_name(vdev); + if (name == NULL) + return -EINVAL; + + input_args = rte_vdev_device_args(vdev); + parse_bbdev_null_params(&init_params, input_args); + + rte_bbdev_log_debug("Init %s on NUMA node %d with max queues: %d", + name, init_params.socket_id, init_params.queues_num); + + return null_bbdev_create(vdev, &init_params); +} + +/* Uninitialise device */ +static int +null_bbdev_remove(struct rte_vdev_device *vdev) +{ + struct rte_bbdev *bbdev; + const char *name; + + if (vdev == NULL) + return -EINVAL; + + name = rte_vdev_device_name(vdev); + if (name == NULL) + return -EINVAL; + + bbdev = rte_bbdev_get_named_dev(name); + if (bbdev == NULL) + return -EINVAL; + + rte_free(bbdev->data->dev_private); + + return rte_bbdev_release(bbdev); +} + +static struct rte_vdev_driver bbdev_null_pmd_drv = { + .probe = null_bbdev_probe, + .remove = null_bbdev_remove +}; + +RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_null_pmd_drv); +RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME, + BBDEV_NULL_MAX_NB_QUEUES_ARG"=<int> " + BBDEV_NULL_SOCKET_ID_ARG"=<int>"); +RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, bbdev_null); + +RTE_INIT(null_bbdev_init_log) +{ + bbdev_null_logtype = rte_log_register("pmd.bb.null"); + if (bbdev_null_logtype >= 0) + rte_log_set_level(bbdev_null_logtype, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/baseband/null/meson.build b/src/spdk/dpdk/drivers/baseband/null/meson.build new file mode 100644 index 000000000..02ef7db57 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/null/meson.build @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Luca Boccassi <bluca@debian.org> + +deps += ['bbdev', 'bus_vdev', 'ring'] +sources = files('bbdev_null.c') diff --git a/src/spdk/dpdk/drivers/baseband/null/rte_pmd_bbdev_null_version.map b/src/spdk/dpdk/drivers/baseband/null/rte_pmd_bbdev_null_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/null/rte_pmd_bbdev_null_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; diff --git a/src/spdk/dpdk/drivers/baseband/turbo_sw/Makefile b/src/spdk/dpdk/drivers/baseband/turbo_sw/Makefile new file mode 100644 index 000000000..c2a6fe0f8 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/turbo_sw/Makefile @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2017 Intel Corporation + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_pmd_bbdev_turbo_sw.a + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring -lrte_kvargs +LDLIBS += -lrte_bbdev +LDLIBS += -lrte_bus_vdev + +# versioning export map +EXPORT_MAP := rte_pmd_bbdev_turbo_sw_version.map + +# external library dependencies if available +ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX2),y) +ifeq ($(FLEXRAN_SDK),) +$(error "Please define FLEXRAN_SDK environment variable") +endif +CFLAGS += -I$(FLEXRAN_SDK)/lib_common +CFLAGS += -I$(FLEXRAN_SDK)/lib_turbo +CFLAGS += -I$(FLEXRAN_SDK)/lib_crc +CFLAGS += -I$(FLEXRAN_SDK)/lib_rate_matching +LDLIBS += -L$(FLEXRAN_SDK)/lib_turbo -lturbo +LDLIBS += -L$(FLEXRAN_SDK)/lib_crc -lcrc +LDLIBS += -L$(FLEXRAN_SDK)/lib_rate_matching -lrate_matching +LDLIBS += -L$(FLEXRAN_SDK)/lib_common -lcommon +LDLIBS += -lstdc++ -lirc -limf -lipps -lsvml +endif + +ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX512),y) +ifeq ($(CONFIG_RTE_BBDEV_SDK_AVX2),n) +$(error "CONFIG_RTE_BBDEV_SDK_AVX512 requires CONFIG_RTE_BBDEV_SDK_AVX2 set") +endif +CFLAGS += -I$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr +CFLAGS += -I$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr +CFLAGS += -I$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr +CFLAGS += -I$(FLEXRAN_SDK)/lib_rate_dematching_5gnr +LDLIBS += -L$(FLEXRAN_SDK)/lib_ldpc_encoder_5gnr -lldpc_encoder_5gnr +LDLIBS += -L$(FLEXRAN_SDK)/lib_ldpc_decoder_5gnr -lldpc_decoder_5gnr +LDLIBS += -L$(FLEXRAN_SDK)/lib_LDPC_ratematch_5gnr -lLDPC_ratematch_5gnr +LDLIBS += -L$(FLEXRAN_SDK)/lib_rate_dematching_5gnr -lrate_dematching_5gnr +endif + +# library source files +SRCS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_TURBO_SW) += bbdev_turbo_software.c + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/src/spdk/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c b/src/spdk/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c new file mode 100644 index 000000000..bb62276b9 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/turbo_sw/bbdev_turbo_software.c @@ -0,0 +1,1999 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2017 Intel Corporation + */ + +#include <string.h> + +#include <rte_common.h> +#include <rte_bus_vdev.h> +#include <rte_malloc.h> +#include <rte_ring.h> +#include <rte_kvargs.h> +#include <rte_cycles.h> + +#include <rte_bbdev.h> +#include <rte_bbdev_pmd.h> + +#include <rte_hexdump.h> +#include <rte_log.h> + +#ifdef RTE_BBDEV_SDK_AVX2 +#include <ipp.h> +#include <ipps.h> +#include <phy_turbo.h> +#include <phy_crc.h> +#include <phy_rate_match.h> +#endif +#ifdef RTE_BBDEV_SDK_AVX512 +#include <bit_reverse.h> +#include <phy_ldpc_encoder_5gnr.h> +#include <phy_ldpc_decoder_5gnr.h> +#include <phy_LDPC_ratematch_5gnr.h> +#include <phy_rate_dematching_5gnr.h> +#endif + +#define DRIVER_NAME baseband_turbo_sw + +/* Turbo SW PMD logging ID */ +static int bbdev_turbo_sw_logtype; + +/* Helper macro for logging */ +#define rte_bbdev_log(level, fmt, ...) \ + rte_log(RTE_LOG_ ## level, bbdev_turbo_sw_logtype, fmt "\n", \ + ##__VA_ARGS__) + +#define rte_bbdev_log_debug(fmt, ...) \ + rte_bbdev_log(DEBUG, RTE_STR(__LINE__) ":%s() " fmt, __func__, \ + ##__VA_ARGS__) + +#define DEINT_INPUT_BUF_SIZE (((RTE_BBDEV_TURBO_MAX_CB_SIZE >> 3) + 1) * 48) +#define DEINT_OUTPUT_BUF_SIZE (DEINT_INPUT_BUF_SIZE * 6) +#define ADAPTER_OUTPUT_BUF_SIZE ((RTE_BBDEV_TURBO_MAX_CB_SIZE + 4) * 48) + +/* private data structure */ +struct bbdev_private { + unsigned int max_nb_queues; /**< Max number of queues */ +}; + +/* Initialisation params structure that can be used by Turbo SW driver */ +struct turbo_sw_params { + int socket_id; /*< Turbo SW device socket */ + uint16_t queues_num; /*< Turbo SW device queues number */ +}; + +/* Accecptable params for Turbo SW devices */ +#define TURBO_SW_MAX_NB_QUEUES_ARG "max_nb_queues" +#define TURBO_SW_SOCKET_ID_ARG "socket_id" + +static const char * const turbo_sw_valid_params[] = { + TURBO_SW_MAX_NB_QUEUES_ARG, + TURBO_SW_SOCKET_ID_ARG +}; + +/* queue */ +struct turbo_sw_queue { + /* Ring for processed (encoded/decoded) operations which are ready to + * be dequeued. + */ + struct rte_ring *processed_pkts; + /* Stores input for turbo encoder (used when CRC attachment is + * performed + */ + uint8_t *enc_in; + /* Stores output from turbo encoder */ + uint8_t *enc_out; + /* Alpha gamma buf for bblib_turbo_decoder() function */ + int8_t *ag; + /* Temp buf for bblib_turbo_decoder() function */ + uint16_t *code_block; + /* Input buf for bblib_rate_dematching_lte() function */ + uint8_t *deint_input; + /* Output buf for bblib_rate_dematching_lte() function */ + uint8_t *deint_output; + /* Output buf for bblib_turbodec_adapter_lte() function */ + uint8_t *adapter_output; + /* Operation type of this queue */ + enum rte_bbdev_op_type type; +} __rte_cache_aligned; + + +#ifdef RTE_BBDEV_SDK_AVX2 +static inline char * +mbuf_append(struct rte_mbuf *m_head, struct rte_mbuf *m, uint16_t len) +{ + if (unlikely(len > rte_pktmbuf_tailroom(m))) + return NULL; + + char *tail = (char *)m->buf_addr + m->data_off + m->data_len; + m->data_len = (uint16_t)(m->data_len + len); + m_head->pkt_len = (m_head->pkt_len + len); + return tail; +} + +/* Calculate index based on Table 5.1.3-3 from TS34.212 */ +static inline int32_t +compute_idx(uint16_t k) +{ + int32_t result = 0; + + if (k < RTE_BBDEV_TURBO_MIN_CB_SIZE || k > RTE_BBDEV_TURBO_MAX_CB_SIZE) + return -1; + + if (k > 2048) { + if ((k - 2048) % 64 != 0) + result = -1; + + result = 124 + (k - 2048) / 64; + } else if (k <= 512) { + if ((k - 40) % 8 != 0) + result = -1; + + result = (k - 40) / 8 + 1; + } else if (k <= 1024) { + if ((k - 512) % 16 != 0) + result = -1; + + result = 60 + (k - 512) / 16; + } else { /* 1024 < k <= 2048 */ + if ((k - 1024) % 32 != 0) + result = -1; + + result = 92 + (k - 1024) / 32; + } + + return result; +} +#endif + +/* Read flag value 0/1 from bitmap */ +static inline bool +check_bit(uint32_t bitmap, uint32_t bitmask) +{ + return bitmap & bitmask; +} + +/* Get device info */ +static void +info_get(struct rte_bbdev *dev, struct rte_bbdev_driver_info *dev_info) +{ + struct bbdev_private *internals = dev->data->dev_private; + + static const struct rte_bbdev_op_cap bbdev_capabilities[] = { +#ifdef RTE_BBDEV_SDK_AVX2 + { + .type = RTE_BBDEV_OP_TURBO_DEC, + .cap.turbo_dec = { + .capability_flags = + RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE | + RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN | + RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN | + RTE_BBDEV_TURBO_CRC_TYPE_24B | + RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP | + RTE_BBDEV_TURBO_EARLY_TERMINATION, + .max_llr_modulus = 16, + .num_buffers_src = + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, + .num_buffers_hard_out = + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, + .num_buffers_soft_out = 0, + } + }, + { + .type = RTE_BBDEV_OP_TURBO_ENC, + .cap.turbo_enc = { + .capability_flags = + RTE_BBDEV_TURBO_CRC_24B_ATTACH | + RTE_BBDEV_TURBO_CRC_24A_ATTACH | + RTE_BBDEV_TURBO_RATE_MATCH | + RTE_BBDEV_TURBO_RV_INDEX_BYPASS, + .num_buffers_src = + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, + .num_buffers_dst = + RTE_BBDEV_TURBO_MAX_CODE_BLOCKS, + } + }, +#endif +#ifdef RTE_BBDEV_SDK_AVX512 + { + .type = RTE_BBDEV_OP_LDPC_ENC, + .cap.ldpc_enc = { + .capability_flags = + RTE_BBDEV_LDPC_RATE_MATCH | + RTE_BBDEV_LDPC_CRC_24A_ATTACH | + RTE_BBDEV_LDPC_CRC_24B_ATTACH, + .num_buffers_src = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_dst = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + } + }, + { + .type = RTE_BBDEV_OP_LDPC_DEC, + .cap.ldpc_dec = { + .capability_flags = + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK | + RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK | + RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP | + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE | + RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE | + RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE, + .llr_size = 8, + .llr_decimals = 4, + .num_buffers_src = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_hard_out = + RTE_BBDEV_LDPC_MAX_CODE_BLOCKS, + .num_buffers_soft_out = 0, + } + }, +#endif + RTE_BBDEV_END_OF_CAPABILITIES_LIST() + }; + + static struct rte_bbdev_queue_conf default_queue_conf = { + .queue_size = RTE_BBDEV_QUEUE_SIZE_LIMIT, + }; +#ifdef RTE_BBDEV_SDK_AVX2 + static const enum rte_cpu_flag_t cpu_flag = RTE_CPUFLAG_SSE4_2; + dev_info->cpu_flag_reqs = &cpu_flag; +#else + dev_info->cpu_flag_reqs = NULL; +#endif + default_queue_conf.socket = dev->data->socket_id; + + dev_info->driver_name = RTE_STR(DRIVER_NAME); + dev_info->max_num_queues = internals->max_nb_queues; + dev_info->queue_size_lim = RTE_BBDEV_QUEUE_SIZE_LIMIT; + dev_info->hardware_accelerated = false; + dev_info->max_dl_queue_priority = 0; + dev_info->max_ul_queue_priority = 0; + dev_info->default_queue_conf = default_queue_conf; + dev_info->capabilities = bbdev_capabilities; + dev_info->min_alignment = 64; + dev_info->harq_buffer_size = 0; + + rte_bbdev_log_debug("got device info from %u\n", dev->data->dev_id); +} + +/* Release queue */ +static int +q_release(struct rte_bbdev *dev, uint16_t q_id) +{ + struct turbo_sw_queue *q = dev->data->queues[q_id].queue_private; + + if (q != NULL) { + rte_ring_free(q->processed_pkts); + rte_free(q->enc_out); + rte_free(q->enc_in); + rte_free(q->ag); + rte_free(q->code_block); + rte_free(q->deint_input); + rte_free(q->deint_output); + rte_free(q->adapter_output); + rte_free(q); + dev->data->queues[q_id].queue_private = NULL; + } + + rte_bbdev_log_debug("released device queue %u:%u", + dev->data->dev_id, q_id); + return 0; +} + +/* Setup a queue */ +static int +q_setup(struct rte_bbdev *dev, uint16_t q_id, + const struct rte_bbdev_queue_conf *queue_conf) +{ + int ret; + struct turbo_sw_queue *q; + char name[RTE_RING_NAMESIZE]; + + /* Allocate the queue data structure. */ + q = rte_zmalloc_socket(RTE_STR(DRIVER_NAME), sizeof(*q), + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q == NULL) { + rte_bbdev_log(ERR, "Failed to allocate queue memory"); + return -ENOMEM; + } + + /* Allocate memory for encoder output. */ + ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_enc_o%u:%u", + dev->data->dev_id, q_id); + if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { + rte_bbdev_log(ERR, + "Creating queue name for device %u queue %u failed", + dev->data->dev_id, q_id); + return -ENAMETOOLONG; + } + q->enc_out = rte_zmalloc_socket(name, + ((RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) + 3) * + sizeof(*q->enc_out) * 3, + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q->enc_out == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate queue memory for %s", name); + goto free_q; + } + + /* Allocate memory for rate matching output. */ + ret = snprintf(name, RTE_RING_NAMESIZE, + RTE_STR(DRIVER_NAME)"_enc_i%u:%u", dev->data->dev_id, + q_id); + if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { + rte_bbdev_log(ERR, + "Creating queue name for device %u queue %u failed", + dev->data->dev_id, q_id); + return -ENAMETOOLONG; + } + q->enc_in = rte_zmalloc_socket(name, + (RTE_BBDEV_LDPC_MAX_CB_SIZE >> 3) * sizeof(*q->enc_in), + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q->enc_in == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate queue memory for %s", name); + goto free_q; + } + + /* Allocate memory for Alpha Gamma temp buffer. */ + ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_ag%u:%u", + dev->data->dev_id, q_id); + if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { + rte_bbdev_log(ERR, + "Creating queue name for device %u queue %u failed", + dev->data->dev_id, q_id); + return -ENAMETOOLONG; + } + q->ag = rte_zmalloc_socket(name, + RTE_BBDEV_TURBO_MAX_CB_SIZE * 10 * sizeof(*q->ag), + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q->ag == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate queue memory for %s", name); + goto free_q; + } + + /* Allocate memory for code block temp buffer. */ + ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"_cb%u:%u", + dev->data->dev_id, q_id); + if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { + rte_bbdev_log(ERR, + "Creating queue name for device %u queue %u failed", + dev->data->dev_id, q_id); + return -ENAMETOOLONG; + } + q->code_block = rte_zmalloc_socket(name, + RTE_BBDEV_TURBO_MAX_CB_SIZE * sizeof(*q->code_block), + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q->code_block == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate queue memory for %s", name); + goto free_q; + } + + /* Allocate memory for Deinterleaver input. */ + ret = snprintf(name, RTE_RING_NAMESIZE, + RTE_STR(DRIVER_NAME)"_de_i%u:%u", + dev->data->dev_id, q_id); + if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { + rte_bbdev_log(ERR, + "Creating queue name for device %u queue %u failed", + dev->data->dev_id, q_id); + return -ENAMETOOLONG; + } + q->deint_input = rte_zmalloc_socket(name, + DEINT_INPUT_BUF_SIZE * sizeof(*q->deint_input), + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q->deint_input == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate queue memory for %s", name); + goto free_q; + } + + /* Allocate memory for Deinterleaver output. */ + ret = snprintf(name, RTE_RING_NAMESIZE, + RTE_STR(DRIVER_NAME)"_de_o%u:%u", + dev->data->dev_id, q_id); + if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { + rte_bbdev_log(ERR, + "Creating queue name for device %u queue %u failed", + dev->data->dev_id, q_id); + return -ENAMETOOLONG; + } + q->deint_output = rte_zmalloc_socket(NULL, + DEINT_OUTPUT_BUF_SIZE * sizeof(*q->deint_output), + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q->deint_output == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate queue memory for %s", name); + goto free_q; + } + + /* Allocate memory for Adapter output. */ + ret = snprintf(name, RTE_RING_NAMESIZE, + RTE_STR(DRIVER_NAME)"_ada_o%u:%u", + dev->data->dev_id, q_id); + if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { + rte_bbdev_log(ERR, + "Creating queue name for device %u queue %u failed", + dev->data->dev_id, q_id); + return -ENAMETOOLONG; + } + q->adapter_output = rte_zmalloc_socket(NULL, + ADAPTER_OUTPUT_BUF_SIZE * sizeof(*q->adapter_output), + RTE_CACHE_LINE_SIZE, queue_conf->socket); + if (q->adapter_output == NULL) { + rte_bbdev_log(ERR, + "Failed to allocate queue memory for %s", name); + goto free_q; + } + + /* Create ring for packets awaiting to be dequeued. */ + ret = snprintf(name, RTE_RING_NAMESIZE, RTE_STR(DRIVER_NAME)"%u:%u", + dev->data->dev_id, q_id); + if ((ret < 0) || (ret >= (int)RTE_RING_NAMESIZE)) { + rte_bbdev_log(ERR, + "Creating queue name for device %u queue %u failed", + dev->data->dev_id, q_id); + return -ENAMETOOLONG; + } + q->processed_pkts = rte_ring_create(name, queue_conf->queue_size, + queue_conf->socket, RING_F_SP_ENQ | RING_F_SC_DEQ); + if (q->processed_pkts == NULL) { + rte_bbdev_log(ERR, "Failed to create ring for %s", name); + goto free_q; + } + + q->type = queue_conf->op_type; + + dev->data->queues[q_id].queue_private = q; + rte_bbdev_log_debug("setup device queue %s", name); + return 0; + +free_q: + rte_ring_free(q->processed_pkts); + rte_free(q->enc_out); + rte_free(q->enc_in); + rte_free(q->ag); + rte_free(q->code_block); + rte_free(q->deint_input); + rte_free(q->deint_output); + rte_free(q->adapter_output); + rte_free(q); + return -EFAULT; +} + +static const struct rte_bbdev_ops pmd_ops = { + .info_get = info_get, + .queue_setup = q_setup, + .queue_release = q_release +}; + +#ifdef RTE_BBDEV_SDK_AVX2 +#ifdef RTE_LIBRTE_BBDEV_DEBUG +/* Checks if the encoder input buffer is correct. + * Returns 0 if it's valid, -1 otherwise. + */ +static inline int +is_enc_input_valid(const uint16_t k, const int32_t k_idx, + const uint16_t in_length) +{ + if (k_idx < 0) { + rte_bbdev_log(ERR, "K Index is invalid"); + return -1; + } + + if (in_length - (k >> 3) < 0) { + rte_bbdev_log(ERR, + "Mismatch between input length (%u bytes) and K (%u bits)", + in_length, k); + return -1; + } + + if (k > RTE_BBDEV_TURBO_MAX_CB_SIZE) { + rte_bbdev_log(ERR, "CB size (%u) is too big, max: %d", + k, RTE_BBDEV_TURBO_MAX_CB_SIZE); + return -1; + } + + return 0; +} + +/* Checks if the decoder input buffer is correct. + * Returns 0 if it's valid, -1 otherwise. + */ +static inline int +is_dec_input_valid(int32_t k_idx, int16_t kw, int16_t in_length) +{ + if (k_idx < 0) { + rte_bbdev_log(ERR, "K index is invalid"); + return -1; + } + + if (in_length < kw) { + rte_bbdev_log(ERR, + "Mismatch between input length (%u) and kw (%u)", + in_length, kw); + return -1; + } + + if (kw > RTE_BBDEV_TURBO_MAX_KW) { + rte_bbdev_log(ERR, "Input length (%u) is too big, max: %d", + kw, RTE_BBDEV_TURBO_MAX_KW); + return -1; + } + + return 0; +} +#endif +#endif + +static inline void +process_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, + uint8_t r, uint8_t c, uint16_t k, uint16_t ncb, + uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, + struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, + uint16_t in_length, struct rte_bbdev_stats *q_stats) +{ +#ifdef RTE_BBDEV_SDK_AVX2 +#ifdef RTE_LIBRTE_BBDEV_DEBUG + int ret; +#else + RTE_SET_USED(in_length); +#endif + int16_t k_idx; + uint16_t m; + uint8_t *in, *out0, *out1, *out2, *tmp_out, *rm_out; + uint64_t first_3_bytes = 0; + struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; + struct bblib_crc_request crc_req; + struct bblib_crc_response crc_resp; + struct bblib_turbo_encoder_request turbo_req; + struct bblib_turbo_encoder_response turbo_resp; + struct bblib_rate_match_dl_request rm_req; + struct bblib_rate_match_dl_response rm_resp; +#ifdef RTE_BBDEV_OFFLOAD_COST + uint64_t start_time; +#else + RTE_SET_USED(q_stats); +#endif + + k_idx = compute_idx(k); + in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); + + /* CRC24A (for TB) */ + if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH) && + (enc->code_block_mode == 1)) { +#ifdef RTE_LIBRTE_BBDEV_DEBUG + ret = is_enc_input_valid(k - 24, k_idx, in_length); + if (ret != 0) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + return; + } +#endif + + crc_req.data = in; + crc_req.len = k - 24; + /* Check if there is a room for CRC bits if not use + * the temporary buffer. + */ + if (mbuf_append(m_in, m_in, 3) == NULL) { + rte_memcpy(q->enc_in, in, (k - 24) >> 3); + in = q->enc_in; + } else { + /* Store 3 first bytes of next CB as they will be + * overwritten by CRC bytes. If it is the last CB then + * there is no point to store 3 next bytes and this + * if..else branch will be omitted. + */ + first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); + } + + crc_resp.data = in; +#ifdef RTE_BBDEV_OFFLOAD_COST + start_time = rte_rdtsc_precise(); +#endif + /* CRC24A generation */ + bblib_lte_crc24a_gen(&crc_req, &crc_resp); +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + } else if (enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) { + /* CRC24B */ +#ifdef RTE_LIBRTE_BBDEV_DEBUG + ret = is_enc_input_valid(k - 24, k_idx, in_length); + if (ret != 0) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + return; + } +#endif + + crc_req.data = in; + crc_req.len = k - 24; + /* Check if there is a room for CRC bits if this is the last + * CB in TB. If not use temporary buffer. + */ + if ((c - r == 1) && (mbuf_append(m_in, m_in, 3) == NULL)) { + rte_memcpy(q->enc_in, in, (k - 24) >> 3); + in = q->enc_in; + } else if (c - r > 1) { + /* Store 3 first bytes of next CB as they will be + * overwritten by CRC bytes. If it is the last CB then + * there is no point to store 3 next bytes and this + * if..else branch will be omitted. + */ + first_3_bytes = *((uint64_t *)&in[(k - 32) >> 3]); + } + + crc_resp.data = in; +#ifdef RTE_BBDEV_OFFLOAD_COST + start_time = rte_rdtsc_precise(); +#endif + /* CRC24B generation */ + bblib_lte_crc24b_gen(&crc_req, &crc_resp); +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + } +#ifdef RTE_LIBRTE_BBDEV_DEBUG + else { + ret = is_enc_input_valid(k, k_idx, in_length); + if (ret != 0) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + return; + } + } +#endif + + /* Turbo encoder */ + + /* Each bit layer output from turbo encoder is (k+4) bits long, i.e. + * input length + 4 tail bits. That's (k/8) + 1 bytes after rounding up. + * So dst_data's length should be 3*(k/8) + 3 bytes. + * In Rate-matching bypass case outputs pointers passed to encoder + * (out0, out1 and out2) can directly point to addresses of output from + * turbo_enc entity. + */ + if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { + out0 = q->enc_out; + out1 = RTE_PTR_ADD(out0, (k >> 3) + 1); + out2 = RTE_PTR_ADD(out1, (k >> 3) + 1); + } else { + out0 = (uint8_t *)mbuf_append(m_out_head, m_out, + (k >> 3) * 3 + 2); + if (out0 == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Too little space in output mbuf"); + return; + } + enc->output.length += (k >> 3) * 3 + 2; + /* rte_bbdev_op_data.offset can be different than the + * offset of the appended bytes + */ + out0 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); + out1 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, + out_offset + (k >> 3) + 1); + out2 = rte_pktmbuf_mtod_offset(m_out, uint8_t *, + out_offset + 2 * ((k >> 3) + 1)); + } + + turbo_req.case_id = k_idx; + turbo_req.input_win = in; + turbo_req.length = k >> 3; + turbo_resp.output_win_0 = out0; + turbo_resp.output_win_1 = out1; + turbo_resp.output_win_2 = out2; + +#ifdef RTE_BBDEV_OFFLOAD_COST + start_time = rte_rdtsc_precise(); +#endif + /* Turbo encoding */ + if (bblib_turbo_encoder(&turbo_req, &turbo_resp) != 0) { + op->status |= 1 << RTE_BBDEV_DRV_ERROR; + rte_bbdev_log(ERR, "Turbo Encoder failed"); + return; + } +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + + /* Restore 3 first bytes of next CB if they were overwritten by CRC*/ + if (first_3_bytes != 0) + *((uint64_t *)&in[(k - 32) >> 3]) = first_3_bytes; + + /* Rate-matching */ + if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) { + uint8_t mask_id; + /* Integer round up division by 8 */ + uint16_t out_len = (e + 7) >> 3; + /* The mask array is indexed using E%8. E is an even number so + * there are only 4 possible values. + */ + const uint8_t mask_out[] = {0xFF, 0xC0, 0xF0, 0xFC}; + + /* get output data starting address */ + rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); + if (rm_out == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Too little space in output mbuf"); + return; + } + /* rte_bbdev_op_data.offset can be different than the offset + * of the appended bytes + */ + rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); + + /* index of current code block */ + rm_req.r = r; + /* total number of code block */ + rm_req.C = c; + /* For DL - 1, UL - 0 */ + rm_req.direction = 1; + /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nsoft, KMIMO + * and MDL_HARQ are used for Ncb calculation. As Ncb is already + * known we can adjust those parameters + */ + rm_req.Nsoft = ncb * rm_req.C; + rm_req.KMIMO = 1; + rm_req.MDL_HARQ = 1; + /* According to 3ggp 36.212 Spec 5.1.4.1.2 section Nl, Qm and G + * are used for E calculation. As E is already known we can + * adjust those parameters + */ + rm_req.NL = e; + rm_req.Qm = 1; + rm_req.G = rm_req.NL * rm_req.Qm * rm_req.C; + + rm_req.rvidx = enc->rv_index; + rm_req.Kidx = k_idx - 1; + rm_req.nLen = k + 4; + rm_req.tin0 = out0; + rm_req.tin1 = out1; + rm_req.tin2 = out2; + rm_resp.output = rm_out; + rm_resp.OutputLen = out_len; + if (enc->op_flags & RTE_BBDEV_TURBO_RV_INDEX_BYPASS) + rm_req.bypass_rvidx = 1; + else + rm_req.bypass_rvidx = 0; + +#ifdef RTE_BBDEV_OFFLOAD_COST + start_time = rte_rdtsc_precise(); +#endif + /* Rate-Matching */ + if (bblib_rate_match_dl(&rm_req, &rm_resp) != 0) { + op->status |= 1 << RTE_BBDEV_DRV_ERROR; + rte_bbdev_log(ERR, "Rate matching failed"); + return; + } +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + + /* SW fills an entire last byte even if E%8 != 0. Clear the + * superfluous data bits for consistency with HW device. + */ + mask_id = (e & 7) >> 1; + rm_out[out_len - 1] &= mask_out[mask_id]; + enc->output.length += rm_resp.OutputLen; + } else { + /* Rate matching is bypassed */ + + /* Completing last byte of out0 (where 4 tail bits are stored) + * by moving first 4 bits from out1 + */ + tmp_out = (uint8_t *) --out1; + *tmp_out = *tmp_out | ((*(tmp_out + 1) & 0xF0) >> 4); + tmp_out++; + /* Shifting out1 data by 4 bits to the left */ + for (m = 0; m < k >> 3; ++m) { + uint8_t *first = tmp_out; + uint8_t second = *(tmp_out + 1); + *first = (*first << 4) | ((second & 0xF0) >> 4); + tmp_out++; + } + /* Shifting out2 data by 8 bits to the left */ + for (m = 0; m < (k >> 3) + 1; ++m) { + *tmp_out = *(tmp_out + 1); + tmp_out++; + } + *tmp_out = 0; + } +#else + RTE_SET_USED(q); + RTE_SET_USED(op); + RTE_SET_USED(r); + RTE_SET_USED(c); + RTE_SET_USED(k); + RTE_SET_USED(ncb); + RTE_SET_USED(e); + RTE_SET_USED(m_in); + RTE_SET_USED(m_out_head); + RTE_SET_USED(m_out); + RTE_SET_USED(in_offset); + RTE_SET_USED(out_offset); + RTE_SET_USED(in_length); + RTE_SET_USED(q_stats); +#endif +} + + +static inline void +process_ldpc_enc_cb(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, + uint32_t e, struct rte_mbuf *m_in, struct rte_mbuf *m_out_head, + struct rte_mbuf *m_out, uint16_t in_offset, uint16_t out_offset, + uint16_t seg_total_left, struct rte_bbdev_stats *q_stats) +{ +#ifdef RTE_BBDEV_SDK_AVX512 + RTE_SET_USED(seg_total_left); + uint8_t *in, *rm_out; + struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; + struct bblib_ldpc_encoder_5gnr_request ldpc_req; + struct bblib_ldpc_encoder_5gnr_response ldpc_resp; + struct bblib_LDPC_ratematch_5gnr_request rm_req; + struct bblib_LDPC_ratematch_5gnr_response rm_resp; + struct bblib_crc_request crc_req; + struct bblib_crc_response crc_resp; + uint16_t msgLen, puntBits, parity_offset, out_len; + uint16_t K = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; + uint16_t in_length_in_bits = K - enc->n_filler; + uint16_t in_length_in_bytes = (in_length_in_bits + 7) >> 3; + +#ifdef RTE_BBDEV_OFFLOAD_COST + uint64_t start_time = rte_rdtsc_precise(); +#else + RTE_SET_USED(q_stats); +#endif + + in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); + + /* Masking the Filler bits explicitly */ + memset(q->enc_in + (in_length_in_bytes - 3), 0, + ((K + 7) >> 3) - (in_length_in_bytes - 3)); + /* CRC Generation */ + if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24A_ATTACH) { + rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); + crc_req.data = in; + crc_req.len = in_length_in_bits - 24; + crc_resp.data = q->enc_in; + bblib_lte_crc24a_gen(&crc_req, &crc_resp); + } else if (enc->op_flags & RTE_BBDEV_LDPC_CRC_24B_ATTACH) { + rte_memcpy(q->enc_in, in, in_length_in_bytes - 3); + crc_req.data = in; + crc_req.len = in_length_in_bits - 24; + crc_resp.data = q->enc_in; + bblib_lte_crc24b_gen(&crc_req, &crc_resp); + } else + rte_memcpy(q->enc_in, in, in_length_in_bytes); + + /* LDPC Encoding */ + ldpc_req.Zc = enc->z_c; + ldpc_req.baseGraph = enc->basegraph; + /* Number of rows set to maximum */ + ldpc_req.nRows = ldpc_req.baseGraph == 1 ? 46 : 42; + ldpc_req.numberCodeblocks = 1; + ldpc_req.input[0] = (int8_t *) q->enc_in; + ldpc_resp.output[0] = (int8_t *) q->enc_out; + + bblib_bit_reverse(ldpc_req.input[0], in_length_in_bytes << 3); + + if (bblib_ldpc_encoder_5gnr(&ldpc_req, &ldpc_resp) != 0) { + op->status |= 1 << RTE_BBDEV_DRV_ERROR; + rte_bbdev_log(ERR, "LDPC Encoder failed"); + return; + } + + /* + * Systematic + Parity : Recreating stream with filler bits, ideally + * the bit select could handle this in the RM SDK + */ + msgLen = (ldpc_req.baseGraph == 1 ? 22 : 10) * ldpc_req.Zc; + puntBits = 2 * ldpc_req.Zc; + parity_offset = msgLen - puntBits; + ippsCopyBE_1u(((uint8_t *) ldpc_req.input[0]) + (puntBits / 8), + puntBits%8, q->adapter_output, 0, parity_offset); + ippsCopyBE_1u(q->enc_out, 0, q->adapter_output + (parity_offset / 8), + parity_offset % 8, ldpc_req.nRows * ldpc_req.Zc); + + out_len = (e + 7) >> 3; + /* get output data starting address */ + rm_out = (uint8_t *)mbuf_append(m_out_head, m_out, out_len); + if (rm_out == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Too little space in output mbuf"); + return; + } + /* + * rte_bbdev_op_data.offset can be different than the offset + * of the appended bytes + */ + rm_out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); + + /* Rate-Matching */ + rm_req.E = e; + rm_req.Ncb = enc->n_cb; + rm_req.Qm = enc->q_m; + rm_req.Zc = enc->z_c; + rm_req.baseGraph = enc->basegraph; + rm_req.input = q->adapter_output; + rm_req.nLen = enc->n_filler; + rm_req.nullIndex = parity_offset - enc->n_filler; + rm_req.rvidx = enc->rv_index; + rm_resp.output = q->deint_output; + + if (bblib_LDPC_ratematch_5gnr(&rm_req, &rm_resp) != 0) { + op->status |= 1 << RTE_BBDEV_DRV_ERROR; + rte_bbdev_log(ERR, "Rate matching failed"); + return; + } + + /* RM SDK may provide non zero bits on last byte */ + if ((e % 8) != 0) + q->deint_output[out_len-1] &= (1 << (e % 8)) - 1; + + bblib_bit_reverse((int8_t *) q->deint_output, out_len << 3); + + rte_memcpy(rm_out, q->deint_output, out_len); + enc->output.length += out_len; + +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif +#else + RTE_SET_USED(q); + RTE_SET_USED(op); + RTE_SET_USED(e); + RTE_SET_USED(m_in); + RTE_SET_USED(m_out_head); + RTE_SET_USED(m_out); + RTE_SET_USED(in_offset); + RTE_SET_USED(out_offset); + RTE_SET_USED(seg_total_left); + RTE_SET_USED(q_stats); +#endif +} + +static inline void +enqueue_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, + struct rte_bbdev_stats *queue_stats) +{ + uint8_t c, r, crc24_bits = 0; + uint16_t k, ncb; + uint32_t e; + struct rte_bbdev_op_turbo_enc *enc = &op->turbo_enc; + uint16_t in_offset = enc->input.offset; + uint16_t out_offset = enc->output.offset; + struct rte_mbuf *m_in = enc->input.data; + struct rte_mbuf *m_out = enc->output.data; + struct rte_mbuf *m_out_head = enc->output.data; + uint32_t in_length, mbuf_total_left = enc->input.length; + uint16_t seg_total_left; + + /* Clear op status */ + op->status = 0; + + if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { + rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", + mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if (m_in == NULL || m_out == NULL) { + rte_bbdev_log(ERR, "Invalid mbuf pointer"); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || + (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) + crc24_bits = 24; + + if (enc->code_block_mode == 0) { /* For Transport Block mode */ + c = enc->tb_params.c; + r = enc->tb_params.r; + } else {/* For Code Block mode */ + c = 1; + r = 0; + } + + while (mbuf_total_left > 0 && r < c) { + + seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; + + if (enc->code_block_mode == 0) { + k = (r < enc->tb_params.c_neg) ? + enc->tb_params.k_neg : enc->tb_params.k_pos; + ncb = (r < enc->tb_params.c_neg) ? + enc->tb_params.ncb_neg : enc->tb_params.ncb_pos; + e = (r < enc->tb_params.cab) ? + enc->tb_params.ea : enc->tb_params.eb; + } else { + k = enc->cb_params.k; + ncb = enc->cb_params.ncb; + e = enc->cb_params.e; + } + + process_enc_cb(q, op, r, c, k, ncb, e, m_in, m_out_head, + m_out, in_offset, out_offset, seg_total_left, + queue_stats); + /* Update total_left */ + in_length = ((k - crc24_bits) >> 3); + mbuf_total_left -= in_length; + /* Update offsets for next CBs (if exist) */ + in_offset += (k - crc24_bits) >> 3; + if (enc->op_flags & RTE_BBDEV_TURBO_RATE_MATCH) + out_offset += e >> 3; + else + out_offset += (k >> 3) * 3 + 2; + + /* Update offsets */ + if (seg_total_left == in_length) { + /* Go to the next mbuf */ + m_in = m_in->next; + m_out = m_out->next; + in_offset = 0; + out_offset = 0; + } + r++; + } + + /* check if all input data was processed */ + if (mbuf_total_left != 0) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included CBs sizes"); + } +} + + +static inline void +enqueue_ldpc_enc_one_op(struct turbo_sw_queue *q, struct rte_bbdev_enc_op *op, + struct rte_bbdev_stats *queue_stats) +{ + uint8_t c, r, crc24_bits = 0; + uint32_t e; + struct rte_bbdev_op_ldpc_enc *enc = &op->ldpc_enc; + uint16_t in_offset = enc->input.offset; + uint16_t out_offset = enc->output.offset; + struct rte_mbuf *m_in = enc->input.data; + struct rte_mbuf *m_out = enc->output.data; + struct rte_mbuf *m_out_head = enc->output.data; + uint32_t in_length, mbuf_total_left = enc->input.length; + + uint16_t seg_total_left; + + /* Clear op status */ + op->status = 0; + + if (mbuf_total_left > RTE_BBDEV_TURBO_MAX_TB_SIZE >> 3) { + rte_bbdev_log(ERR, "TB size (%u) is too big, max: %d", + mbuf_total_left, RTE_BBDEV_TURBO_MAX_TB_SIZE); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if (m_in == NULL || m_out == NULL) { + rte_bbdev_log(ERR, "Invalid mbuf pointer"); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if ((enc->op_flags & RTE_BBDEV_TURBO_CRC_24B_ATTACH) || + (enc->op_flags & RTE_BBDEV_TURBO_CRC_24A_ATTACH)) + crc24_bits = 24; + + if (enc->code_block_mode == 0) { /* For Transport Block mode */ + c = enc->tb_params.c; + r = enc->tb_params.r; + } else { /* For Code Block mode */ + c = 1; + r = 0; + } + + while (mbuf_total_left > 0 && r < c) { + + seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; + + if (enc->code_block_mode == 0) { + e = (r < enc->tb_params.cab) ? + enc->tb_params.ea : enc->tb_params.eb; + } else { + e = enc->cb_params.e; + } + + process_ldpc_enc_cb(q, op, e, m_in, m_out_head, + m_out, in_offset, out_offset, seg_total_left, + queue_stats); + /* Update total_left */ + in_length = (enc->basegraph == 1 ? 22 : 10) * enc->z_c; + in_length = ((in_length - crc24_bits - enc->n_filler) >> 3); + mbuf_total_left -= in_length; + /* Update offsets for next CBs (if exist) */ + in_offset += in_length; + out_offset += (e + 7) >> 3; + + /* Update offsets */ + if (seg_total_left == in_length) { + /* Go to the next mbuf */ + m_in = m_in->next; + m_out = m_out->next; + in_offset = 0; + out_offset = 0; + } + r++; + } + + /* check if all input data was processed */ + if (mbuf_total_left != 0) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Mismatch between mbuf length and included CBs sizes %d", + mbuf_total_left); + } +} + +static inline uint16_t +enqueue_enc_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_enc_op **ops, + uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) +{ + uint16_t i; +#ifdef RTE_BBDEV_OFFLOAD_COST + queue_stats->acc_offload_cycles = 0; +#endif + + for (i = 0; i < nb_ops; ++i) + enqueue_enc_one_op(q, ops[i], queue_stats); + + return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, + NULL); +} + +static inline uint16_t +enqueue_ldpc_enc_all_ops(struct turbo_sw_queue *q, + struct rte_bbdev_enc_op **ops, + uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) +{ + uint16_t i; +#ifdef RTE_BBDEV_OFFLOAD_COST + queue_stats->acc_offload_cycles = 0; +#endif + + for (i = 0; i < nb_ops; ++i) + enqueue_ldpc_enc_one_op(q, ops[i], queue_stats); + + return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, + NULL); +} + +#ifdef RTE_BBDEV_SDK_AVX2 +static inline void +move_padding_bytes(const uint8_t *in, uint8_t *out, uint16_t k, + uint16_t ncb) +{ + uint16_t d = k + 4; + uint16_t kpi = ncb / 3; + uint16_t nd = kpi - d; + + rte_memcpy(&out[nd], in, d); + rte_memcpy(&out[nd + kpi + 64], &in[kpi], d); + rte_memcpy(&out[(nd - 1) + 2 * (kpi + 64)], &in[2 * kpi], d); +} +#endif + +static inline void +process_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, + uint8_t c, uint16_t k, uint16_t kw, struct rte_mbuf *m_in, + struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, + uint16_t in_offset, uint16_t out_offset, bool check_crc_24b, + uint16_t crc24_overlap, uint16_t in_length, + struct rte_bbdev_stats *q_stats) +{ +#ifdef RTE_BBDEV_SDK_AVX2 +#ifdef RTE_LIBRTE_BBDEV_DEBUG + int ret; +#else + RTE_SET_USED(in_length); +#endif + int32_t k_idx; + int32_t iter_cnt; + uint8_t *in, *out, *adapter_input; + int32_t ncb, ncb_without_null; + struct bblib_turbo_adapter_ul_response adapter_resp; + struct bblib_turbo_adapter_ul_request adapter_req; + struct bblib_turbo_decoder_request turbo_req; + struct bblib_turbo_decoder_response turbo_resp; + struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; +#ifdef RTE_BBDEV_OFFLOAD_COST + uint64_t start_time; +#else + RTE_SET_USED(q_stats); +#endif + + k_idx = compute_idx(k); + +#ifdef RTE_LIBRTE_BBDEV_DEBUG + ret = is_dec_input_valid(k_idx, kw, in_length); + if (ret != 0) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + return; + } +#endif + + in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); + ncb = kw; + ncb_without_null = (k + 4) * 3; + + if (check_bit(dec->op_flags, RTE_BBDEV_TURBO_SUBBLOCK_DEINTERLEAVE)) { + struct bblib_deinterleave_ul_request deint_req; + struct bblib_deinterleave_ul_response deint_resp; + + deint_req.circ_buffer = BBLIB_FULL_CIRCULAR_BUFFER; + deint_req.pharqbuffer = in; + deint_req.ncb = ncb; + deint_resp.pinteleavebuffer = q->deint_output; + +#ifdef RTE_BBDEV_OFFLOAD_COST + start_time = rte_rdtsc_precise(); +#endif + /* Sub-block De-Interleaving */ + bblib_deinterleave_ul(&deint_req, &deint_resp); +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + } else + move_padding_bytes(in, q->deint_output, k, ncb); + + adapter_input = q->deint_output; + + if (dec->op_flags & RTE_BBDEV_TURBO_POS_LLR_1_BIT_IN) + adapter_req.isinverted = 1; + else if (dec->op_flags & RTE_BBDEV_TURBO_NEG_LLR_1_BIT_IN) + adapter_req.isinverted = 0; + else { + op->status |= 1 << RTE_BBDEV_DRV_ERROR; + rte_bbdev_log(ERR, "LLR format wasn't specified"); + return; + } + + adapter_req.ncb = ncb_without_null; + adapter_req.pinteleavebuffer = adapter_input; + adapter_resp.pharqout = q->adapter_output; + +#ifdef RTE_BBDEV_OFFLOAD_COST + start_time = rte_rdtsc_precise(); +#endif + /* Turbo decode adaptation */ + bblib_turbo_adapter_ul(&adapter_req, &adapter_resp); +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + + out = (uint8_t *)mbuf_append(m_out_head, m_out, + ((k - crc24_overlap) >> 3)); + if (out == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, "Too little space in output mbuf"); + return; + } + /* rte_bbdev_op_data.offset can be different than the offset of the + * appended bytes + */ + out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); + if (check_crc_24b) + turbo_req.c = c + 1; + else + turbo_req.c = c; + turbo_req.input = (int8_t *)q->adapter_output; + turbo_req.k = k; + turbo_req.k_idx = k_idx; + turbo_req.max_iter_num = dec->iter_max; + turbo_req.early_term_disable = !check_bit(dec->op_flags, + RTE_BBDEV_TURBO_EARLY_TERMINATION); + turbo_resp.ag_buf = q->ag; + turbo_resp.cb_buf = q->code_block; + turbo_resp.output = out; + +#ifdef RTE_BBDEV_OFFLOAD_COST + start_time = rte_rdtsc_precise(); +#endif + /* Turbo decode */ + iter_cnt = bblib_turbo_decoder(&turbo_req, &turbo_resp); +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + dec->hard_output.length += (k >> 3); + + if (iter_cnt > 0) { + /* Temporary solution for returned iter_count from SDK */ + iter_cnt = (iter_cnt - 1) >> 1; + dec->iter_count = RTE_MAX(iter_cnt, dec->iter_count); + } else { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, "Turbo Decoder failed"); + return; + } +#else + RTE_SET_USED(q); + RTE_SET_USED(op); + RTE_SET_USED(c); + RTE_SET_USED(k); + RTE_SET_USED(kw); + RTE_SET_USED(m_in); + RTE_SET_USED(m_out_head); + RTE_SET_USED(m_out); + RTE_SET_USED(in_offset); + RTE_SET_USED(out_offset); + RTE_SET_USED(check_crc_24b); + RTE_SET_USED(crc24_overlap); + RTE_SET_USED(in_length); + RTE_SET_USED(q_stats); +#endif +} + +static inline void +process_ldpc_dec_cb(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, + uint8_t c, uint16_t out_length, uint32_t e, + struct rte_mbuf *m_in, + struct rte_mbuf *m_out_head, struct rte_mbuf *m_out, + struct rte_mbuf *m_harq_in, + struct rte_mbuf *m_harq_out_head, struct rte_mbuf *m_harq_out, + uint16_t in_offset, uint16_t out_offset, + uint16_t harq_in_offset, uint16_t harq_out_offset, + bool check_crc_24b, + uint16_t crc24_overlap, uint16_t in_length, + struct rte_bbdev_stats *q_stats) +{ +#ifdef RTE_BBDEV_SDK_AVX512 + RTE_SET_USED(in_length); + RTE_SET_USED(c); + uint8_t *in, *out, *harq_in, *harq_out, *adapter_input; + struct bblib_rate_dematching_5gnr_request derm_req; + struct bblib_rate_dematching_5gnr_response derm_resp; + struct bblib_ldpc_decoder_5gnr_request dec_req; + struct bblib_ldpc_decoder_5gnr_response dec_resp; + struct bblib_crc_request crc_req; + struct bblib_crc_response crc_resp; + struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; + uint16_t K, parity_offset, sys_cols, outLenWithCrc; + int16_t deRmOutSize, numRows; + + /* Compute some LDPC BG lengths */ + outLenWithCrc = out_length + (crc24_overlap >> 3); + sys_cols = (dec->basegraph == 1) ? 22 : 10; + K = sys_cols * dec->z_c; + parity_offset = K - 2 * dec->z_c; + +#ifdef RTE_BBDEV_OFFLOAD_COST + uint64_t start_time = rte_rdtsc_precise(); +#else + RTE_SET_USED(q_stats); +#endif + + in = rte_pktmbuf_mtod_offset(m_in, uint8_t *, in_offset); + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE)) { + /** + * Single contiguous block from the first LLR of the + * circular buffer. + */ + harq_in = NULL; + if (m_harq_in != NULL) + harq_in = rte_pktmbuf_mtod_offset(m_harq_in, + uint8_t *, harq_in_offset); + if (harq_in == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, "No space in harq input mbuf"); + return; + } + uint16_t harq_in_length = RTE_MIN( + dec->harq_combined_input.length, + (uint32_t) dec->n_cb); + memset(q->ag + harq_in_length, 0, + dec->n_cb - harq_in_length); + rte_memcpy(q->ag, harq_in, harq_in_length); + } + + derm_req.p_in = (int8_t *) in; + derm_req.p_harq = q->ag; /* This doesn't include the filler bits */ + derm_req.base_graph = dec->basegraph; + derm_req.zc = dec->z_c; + derm_req.ncb = dec->n_cb; + derm_req.e = e; + derm_req.k0 = 0; /* Actual output from SDK */ + derm_req.isretx = check_bit(dec->op_flags, + RTE_BBDEV_LDPC_HQ_COMBINE_IN_ENABLE); + derm_req.rvid = dec->rv_index; + derm_req.modulation_order = dec->q_m; + derm_req.start_null_index = parity_offset - dec->n_filler; + derm_req.num_of_null = dec->n_filler; + + bblib_rate_dematching_5gnr(&derm_req, &derm_resp); + + /* Compute RM out size and number of rows */ + deRmOutSize = RTE_MIN( + derm_req.k0 + derm_req.e - + ((derm_req.k0 < derm_req.start_null_index) ? + 0 : dec->n_filler), + dec->n_cb - dec->n_filler); + if (m_harq_in != NULL) + deRmOutSize = RTE_MAX(deRmOutSize, + RTE_MIN(dec->n_cb - dec->n_filler, + m_harq_in->data_len)); + numRows = ((deRmOutSize + dec->n_filler + dec->z_c - 1) / dec->z_c) + - sys_cols + 2; + numRows = RTE_MAX(4, numRows); + + /* get output data starting address */ + out = (uint8_t *)mbuf_append(m_out_head, m_out, out_length); + if (out == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, + "Too little space in LDPC decoder output mbuf"); + return; + } + + /* rte_bbdev_op_data.offset can be different than the offset + * of the appended bytes + */ + out = rte_pktmbuf_mtod_offset(m_out, uint8_t *, out_offset); + adapter_input = q->enc_out; + + dec_req.Zc = dec->z_c; + dec_req.baseGraph = dec->basegraph; + dec_req.nRows = numRows; + dec_req.numChannelLlrs = deRmOutSize; + dec_req.varNodes = derm_req.p_harq; + dec_req.numFillerBits = dec->n_filler; + dec_req.maxIterations = dec->iter_max; + dec_req.enableEarlyTermination = check_bit(dec->op_flags, + RTE_BBDEV_LDPC_ITERATION_STOP_ENABLE); + dec_resp.varNodes = (int16_t *) q->adapter_output; + dec_resp.compactedMessageBytes = q->enc_out; + + bblib_ldpc_decoder_5gnr(&dec_req, &dec_resp); + + dec->iter_count = RTE_MAX(dec_resp.iterationAtTermination, + dec->iter_count); + if (!dec_resp.parityPassedAtTermination) + op->status |= 1 << RTE_BBDEV_SYNDROME_ERROR; + + bblib_bit_reverse((int8_t *) q->enc_out, outLenWithCrc << 3); + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24A_CHECK) || + check_bit(dec->op_flags, + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK)) { + crc_req.data = adapter_input; + crc_req.len = K - dec->n_filler - 24; + crc_resp.check_passed = false; + crc_resp.data = adapter_input; + if (check_crc_24b) + bblib_lte_crc24b_check(&crc_req, &crc_resp); + else + bblib_lte_crc24a_check(&crc_req, &crc_resp); + if (!crc_resp.check_passed) + op->status |= 1 << RTE_BBDEV_CRC_ERROR; + } + +#ifdef RTE_BBDEV_OFFLOAD_COST + q_stats->acc_offload_cycles += rte_rdtsc_precise() - start_time; +#endif + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_HQ_COMBINE_OUT_ENABLE)) { + harq_out = NULL; + if (m_harq_out != NULL) { + /* Initialize HARQ data length since we overwrite */ + m_harq_out->data_len = 0; + /* Check there is enough space + * in the HARQ outbound buffer + */ + harq_out = (uint8_t *)mbuf_append(m_harq_out_head, + m_harq_out, deRmOutSize); + } + if (harq_out == NULL) { + op->status |= 1 << RTE_BBDEV_DATA_ERROR; + rte_bbdev_log(ERR, "No space in HARQ output mbuf"); + return; + } + /* get output data starting address and overwrite the data */ + harq_out = rte_pktmbuf_mtod_offset(m_harq_out, uint8_t *, + harq_out_offset); + rte_memcpy(harq_out, derm_req.p_harq, deRmOutSize); + dec->harq_combined_output.length += deRmOutSize; + } + + rte_memcpy(out, adapter_input, out_length); + dec->hard_output.length += out_length; +#else + RTE_SET_USED(q); + RTE_SET_USED(op); + RTE_SET_USED(c); + RTE_SET_USED(out_length); + RTE_SET_USED(e); + RTE_SET_USED(m_in); + RTE_SET_USED(m_out_head); + RTE_SET_USED(m_out); + RTE_SET_USED(m_harq_in); + RTE_SET_USED(m_harq_out_head); + RTE_SET_USED(m_harq_out); + RTE_SET_USED(harq_in_offset); + RTE_SET_USED(harq_out_offset); + RTE_SET_USED(in_offset); + RTE_SET_USED(out_offset); + RTE_SET_USED(check_crc_24b); + RTE_SET_USED(crc24_overlap); + RTE_SET_USED(in_length); + RTE_SET_USED(q_stats); +#endif +} + + +static inline void +enqueue_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, + struct rte_bbdev_stats *queue_stats) +{ + uint8_t c, r = 0; + uint16_t kw, k = 0; + uint16_t crc24_overlap = 0; + struct rte_bbdev_op_turbo_dec *dec = &op->turbo_dec; + struct rte_mbuf *m_in = dec->input.data; + struct rte_mbuf *m_out = dec->hard_output.data; + struct rte_mbuf *m_out_head = dec->hard_output.data; + uint16_t in_offset = dec->input.offset; + uint16_t out_offset = dec->hard_output.offset; + uint32_t mbuf_total_left = dec->input.length; + uint16_t seg_total_left; + + /* Clear op status */ + op->status = 0; + + if (m_in == NULL || m_out == NULL) { + rte_bbdev_log(ERR, "Invalid mbuf pointer"); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if (dec->code_block_mode == 0) { /* For Transport Block mode */ + c = dec->tb_params.c; + } else { /* For Code Block mode */ + k = dec->cb_params.k; + c = 1; + } + + if ((c > 1) && !check_bit(dec->op_flags, + RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP)) + crc24_overlap = 24; + + while (mbuf_total_left > 0) { + if (dec->code_block_mode == 0) + k = (r < dec->tb_params.c_neg) ? + dec->tb_params.k_neg : dec->tb_params.k_pos; + + seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; + + /* Calculates circular buffer size (Kw). + * According to 3gpp 36.212 section 5.1.4.2 + * Kw = 3 * Kpi, + * where: + * Kpi = nCol * nRow + * where nCol is 32 and nRow can be calculated from: + * D =< nCol * nRow + * where D is the size of each output from turbo encoder block + * (k + 4). + */ + kw = RTE_ALIGN_CEIL(k + 4, RTE_BBDEV_TURBO_C_SUBBLOCK) * 3; + + process_dec_cb(q, op, c, k, kw, m_in, m_out_head, m_out, + in_offset, out_offset, check_bit(dec->op_flags, + RTE_BBDEV_TURBO_CRC_TYPE_24B), crc24_overlap, + seg_total_left, queue_stats); + + /* To keep CRC24 attached to end of Code block, use + * RTE_BBDEV_TURBO_DEC_TB_CRC_24B_KEEP flag as it + * removed by default once verified. + */ + + mbuf_total_left -= kw; + + /* Update offsets */ + if (seg_total_left == kw) { + /* Go to the next mbuf */ + m_in = m_in->next; + m_out = m_out->next; + in_offset = 0; + out_offset = 0; + } else { + /* Update offsets for next CBs (if exist) */ + in_offset += kw; + out_offset += ((k - crc24_overlap) >> 3); + } + r++; + } +} + +static inline void +enqueue_ldpc_dec_one_op(struct turbo_sw_queue *q, struct rte_bbdev_dec_op *op, + struct rte_bbdev_stats *queue_stats) +{ + uint8_t c, r = 0; + uint32_t e; + uint16_t out_length, crc24_overlap = 0; + struct rte_bbdev_op_ldpc_dec *dec = &op->ldpc_dec; + struct rte_mbuf *m_in = dec->input.data; + struct rte_mbuf *m_harq_in = dec->harq_combined_input.data; + struct rte_mbuf *m_harq_out = dec->harq_combined_output.data; + struct rte_mbuf *m_harq_out_head = dec->harq_combined_output.data; + struct rte_mbuf *m_out = dec->hard_output.data; + struct rte_mbuf *m_out_head = dec->hard_output.data; + uint16_t in_offset = dec->input.offset; + uint16_t harq_in_offset = dec->harq_combined_input.offset; + uint16_t harq_out_offset = dec->harq_combined_output.offset; + uint16_t out_offset = dec->hard_output.offset; + uint32_t mbuf_total_left = dec->input.length; + uint16_t seg_total_left; + + /* Clear op status */ + op->status = 0; + + if (m_in == NULL || m_out == NULL) { + rte_bbdev_log(ERR, "Invalid mbuf pointer"); + op->status = 1 << RTE_BBDEV_DATA_ERROR; + return; + } + + if (dec->code_block_mode == 0) { /* For Transport Block mode */ + c = dec->tb_params.c; + e = dec->tb_params.ea; + } else { /* For Code Block mode */ + c = 1; + e = dec->cb_params.e; + } + + if (check_bit(dec->op_flags, RTE_BBDEV_LDPC_CRC_TYPE_24B_DROP)) + crc24_overlap = 24; + + out_length = (dec->basegraph == 1 ? 22 : 10) * dec->z_c; /* K */ + out_length = ((out_length - crc24_overlap - dec->n_filler) >> 3); + + while (mbuf_total_left > 0) { + if (dec->code_block_mode == 0) + e = (r < dec->tb_params.cab) ? + dec->tb_params.ea : dec->tb_params.eb; + /* Special case handling when overusing mbuf */ + if (e < RTE_BBDEV_LDPC_E_MAX_MBUF) + seg_total_left = rte_pktmbuf_data_len(m_in) - in_offset; + else + seg_total_left = e; + + process_ldpc_dec_cb(q, op, c, out_length, e, + m_in, m_out_head, m_out, + m_harq_in, m_harq_out_head, m_harq_out, + in_offset, out_offset, harq_in_offset, + harq_out_offset, + check_bit(dec->op_flags, + RTE_BBDEV_LDPC_CRC_TYPE_24B_CHECK), + crc24_overlap, + seg_total_left, queue_stats); + + /* To keep CRC24 attached to end of Code block, use + * RTE_BBDEV_LDPC_DEC_TB_CRC_24B_KEEP flag as it + * removed by default once verified. + */ + + mbuf_total_left -= e; + + /* Update offsets */ + if (seg_total_left == e) { + /* Go to the next mbuf */ + m_in = m_in->next; + m_out = m_out->next; + if (m_harq_in != NULL) + m_harq_in = m_harq_in->next; + if (m_harq_out != NULL) + m_harq_out = m_harq_out->next; + in_offset = 0; + out_offset = 0; + harq_in_offset = 0; + harq_out_offset = 0; + } else { + /* Update offsets for next CBs (if exist) */ + in_offset += e; + out_offset += out_length; + } + r++; + } +} + +static inline uint16_t +enqueue_dec_all_ops(struct turbo_sw_queue *q, struct rte_bbdev_dec_op **ops, + uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) +{ + uint16_t i; +#ifdef RTE_BBDEV_OFFLOAD_COST + queue_stats->acc_offload_cycles = 0; +#endif + + for (i = 0; i < nb_ops; ++i) + enqueue_dec_one_op(q, ops[i], queue_stats); + + return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, + NULL); +} + +static inline uint16_t +enqueue_ldpc_dec_all_ops(struct turbo_sw_queue *q, + struct rte_bbdev_dec_op **ops, + uint16_t nb_ops, struct rte_bbdev_stats *queue_stats) +{ + uint16_t i; +#ifdef RTE_BBDEV_OFFLOAD_COST + queue_stats->acc_offload_cycles = 0; +#endif + + for (i = 0; i < nb_ops; ++i) + enqueue_ldpc_dec_one_op(q, ops[i], queue_stats); + + return rte_ring_enqueue_burst(q->processed_pkts, (void **)ops, nb_ops, + NULL); +} + +/* Enqueue burst */ +static uint16_t +enqueue_enc_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t nb_ops) +{ + void *queue = q_data->queue_private; + struct turbo_sw_queue *q = queue; + uint16_t nb_enqueued = 0; + + nb_enqueued = enqueue_enc_all_ops(q, ops, nb_ops, &q_data->queue_stats); + + q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; + q_data->queue_stats.enqueued_count += nb_enqueued; + + return nb_enqueued; +} + +/* Enqueue burst */ +static uint16_t +enqueue_ldpc_enc_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t nb_ops) +{ + void *queue = q_data->queue_private; + struct turbo_sw_queue *q = queue; + uint16_t nb_enqueued = 0; + + nb_enqueued = enqueue_ldpc_enc_all_ops( + q, ops, nb_ops, &q_data->queue_stats); + + q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; + q_data->queue_stats.enqueued_count += nb_enqueued; + + return nb_enqueued; +} + +/* Enqueue burst */ +static uint16_t +enqueue_dec_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t nb_ops) +{ + void *queue = q_data->queue_private; + struct turbo_sw_queue *q = queue; + uint16_t nb_enqueued = 0; + + nb_enqueued = enqueue_dec_all_ops(q, ops, nb_ops, &q_data->queue_stats); + + q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; + q_data->queue_stats.enqueued_count += nb_enqueued; + + return nb_enqueued; +} + +/* Enqueue burst */ +static uint16_t +enqueue_ldpc_dec_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t nb_ops) +{ + void *queue = q_data->queue_private; + struct turbo_sw_queue *q = queue; + uint16_t nb_enqueued = 0; + + nb_enqueued = enqueue_ldpc_dec_all_ops(q, ops, nb_ops, + &q_data->queue_stats); + + q_data->queue_stats.enqueue_err_count += nb_ops - nb_enqueued; + q_data->queue_stats.enqueued_count += nb_enqueued; + + return nb_enqueued; +} + +/* Dequeue decode burst */ +static uint16_t +dequeue_dec_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_dec_op **ops, uint16_t nb_ops) +{ + struct turbo_sw_queue *q = q_data->queue_private; + uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, + (void **)ops, nb_ops, NULL); + q_data->queue_stats.dequeued_count += nb_dequeued; + + return nb_dequeued; +} + +/* Dequeue encode burst */ +static uint16_t +dequeue_enc_ops(struct rte_bbdev_queue_data *q_data, + struct rte_bbdev_enc_op **ops, uint16_t nb_ops) +{ + struct turbo_sw_queue *q = q_data->queue_private; + uint16_t nb_dequeued = rte_ring_dequeue_burst(q->processed_pkts, + (void **)ops, nb_ops, NULL); + q_data->queue_stats.dequeued_count += nb_dequeued; + + return nb_dequeued; +} + +/* Parse 16bit integer from string argument */ +static inline int +parse_u16_arg(const char *key, const char *value, void *extra_args) +{ + uint16_t *u16 = extra_args; + unsigned int long result; + + if ((value == NULL) || (extra_args == NULL)) + return -EINVAL; + errno = 0; + result = strtoul(value, NULL, 0); + if ((result >= (1 << 16)) || (errno != 0)) { + rte_bbdev_log(ERR, "Invalid value %lu for %s", result, key); + return -ERANGE; + } + *u16 = (uint16_t)result; + return 0; +} + +/* Parse parameters used to create device */ +static int +parse_turbo_sw_params(struct turbo_sw_params *params, const char *input_args) +{ + struct rte_kvargs *kvlist = NULL; + int ret = 0; + + if (params == NULL) + return -EINVAL; + if (input_args) { + kvlist = rte_kvargs_parse(input_args, turbo_sw_valid_params); + if (kvlist == NULL) + return -EFAULT; + + ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[0], + &parse_u16_arg, ¶ms->queues_num); + if (ret < 0) + goto exit; + + ret = rte_kvargs_process(kvlist, turbo_sw_valid_params[1], + &parse_u16_arg, ¶ms->socket_id); + if (ret < 0) + goto exit; + + if (params->socket_id >= RTE_MAX_NUMA_NODES) { + rte_bbdev_log(ERR, "Invalid socket, must be < %u", + RTE_MAX_NUMA_NODES); + goto exit; + } + } + +exit: + if (kvlist) + rte_kvargs_free(kvlist); + return ret; +} + +/* Create device */ +static int +turbo_sw_bbdev_create(struct rte_vdev_device *vdev, + struct turbo_sw_params *init_params) +{ + struct rte_bbdev *bbdev; + const char *name = rte_vdev_device_name(vdev); + + bbdev = rte_bbdev_allocate(name); + if (bbdev == NULL) + return -ENODEV; + + bbdev->data->dev_private = rte_zmalloc_socket(name, + sizeof(struct bbdev_private), RTE_CACHE_LINE_SIZE, + init_params->socket_id); + if (bbdev->data->dev_private == NULL) { + rte_bbdev_release(bbdev); + return -ENOMEM; + } + + bbdev->dev_ops = &pmd_ops; + bbdev->device = &vdev->device; + bbdev->data->socket_id = init_params->socket_id; + bbdev->intr_handle = NULL; + + /* register rx/tx burst functions for data path */ + bbdev->dequeue_enc_ops = dequeue_enc_ops; + bbdev->dequeue_dec_ops = dequeue_dec_ops; + bbdev->enqueue_enc_ops = enqueue_enc_ops; + bbdev->enqueue_dec_ops = enqueue_dec_ops; + bbdev->dequeue_ldpc_enc_ops = dequeue_enc_ops; + bbdev->dequeue_ldpc_dec_ops = dequeue_dec_ops; + bbdev->enqueue_ldpc_enc_ops = enqueue_ldpc_enc_ops; + bbdev->enqueue_ldpc_dec_ops = enqueue_ldpc_dec_ops; + ((struct bbdev_private *) bbdev->data->dev_private)->max_nb_queues = + init_params->queues_num; + + return 0; +} + +/* Initialise device */ +static int +turbo_sw_bbdev_probe(struct rte_vdev_device *vdev) +{ + struct turbo_sw_params init_params = { + rte_socket_id(), + RTE_BBDEV_DEFAULT_MAX_NB_QUEUES + }; + const char *name; + const char *input_args; + + if (vdev == NULL) + return -EINVAL; + + name = rte_vdev_device_name(vdev); + if (name == NULL) + return -EINVAL; + input_args = rte_vdev_device_args(vdev); + parse_turbo_sw_params(&init_params, input_args); + + rte_bbdev_log_debug( + "Initialising %s on NUMA node %d with max queues: %d\n", + name, init_params.socket_id, init_params.queues_num); + + return turbo_sw_bbdev_create(vdev, &init_params); +} + +/* Uninitialise device */ +static int +turbo_sw_bbdev_remove(struct rte_vdev_device *vdev) +{ + struct rte_bbdev *bbdev; + const char *name; + + if (vdev == NULL) + return -EINVAL; + + name = rte_vdev_device_name(vdev); + if (name == NULL) + return -EINVAL; + + bbdev = rte_bbdev_get_named_dev(name); + if (bbdev == NULL) + return -EINVAL; + + rte_free(bbdev->data->dev_private); + + return rte_bbdev_release(bbdev); +} + +static struct rte_vdev_driver bbdev_turbo_sw_pmd_drv = { + .probe = turbo_sw_bbdev_probe, + .remove = turbo_sw_bbdev_remove +}; + +RTE_PMD_REGISTER_VDEV(DRIVER_NAME, bbdev_turbo_sw_pmd_drv); +RTE_PMD_REGISTER_PARAM_STRING(DRIVER_NAME, + TURBO_SW_MAX_NB_QUEUES_ARG"=<int> " + TURBO_SW_SOCKET_ID_ARG"=<int>"); +RTE_PMD_REGISTER_ALIAS(DRIVER_NAME, turbo_sw); + +RTE_INIT(turbo_sw_bbdev_init_log) +{ + bbdev_turbo_sw_logtype = rte_log_register("pmd.bb.turbo_sw"); + if (bbdev_turbo_sw_logtype >= 0) + rte_log_set_level(bbdev_turbo_sw_logtype, RTE_LOG_NOTICE); +} diff --git a/src/spdk/dpdk/drivers/baseband/turbo_sw/meson.build b/src/spdk/dpdk/drivers/baseband/turbo_sw/meson.build new file mode 100644 index 000000000..f5a1ab3fc --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/turbo_sw/meson.build @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2019 Intel Corporation + +path = get_option('flexran_sdk') + +if dpdk_conf.has('RTE_BBDEV_SDK_AVX2') + lib = cc.find_library('libturbo', dirs: [path + '/lib_turbo'], required: false) + if not lib.found() + build = false + reason = 'missing dependency, "libturbo"' + else + ext_deps += cc.find_library('libturbo', dirs: [path + '/lib_turbo'], required: true) + ext_deps += cc.find_library('libcrc', dirs: [path + '/lib_crc'], required: true) + ext_deps += cc.find_library('librate_matching', dirs: [path + '/lib_rate_matching'], required: true) + ext_deps += cc.find_library('libcommon', dirs: [path + '/lib_common'], required: true) + ext_deps += cc.find_library('libstdc++', required: true) + ext_deps += cc.find_library('libirc', required: true) + ext_deps += cc.find_library('libimf', required: true) + ext_deps += cc.find_library('libipps', required: true) + ext_deps += cc.find_library('libsvml', required: true) + includes += include_directories(path + '/lib_turbo') + includes += include_directories(path + '/lib_crc') + includes += include_directories(path + '/lib_rate_matching') + includes += include_directories(path + '/lib_common') + endif +endif +if dpdk_conf.has('RTE_BBDEV_SDK_AVX512') + ext_deps += cc.find_library('libldpc_encoder_5gnr', dirs: [path + '/lib_ldpc_encoder_5gnr'], required: true) + ext_deps += cc.find_library('libldpc_decoder_5gnr', dirs: [path + '/lib_ldpc_decoder_5gnr'], required: true) + ext_deps += cc.find_library('libLDPC_ratematch_5gnr', dirs: [path + '/lib_LDPC_ratematch_5gnr'], required: true) + ext_deps += cc.find_library('librate_dematching_5gnr', dirs: [path + '/lib_rate_dematching_5gnr'], required: true) + includes += include_directories(path + '/lib_ldpc_encoder_5gnr') + includes += include_directories(path + '/lib_ldpc_decoder_5gnr') + includes += include_directories(path + '/lib_LDPC_ratematch_5gnr') + includes += include_directories(path + '/lib_rate_dematching_5gnr') +endif + +deps += ['bbdev', 'bus_vdev', 'ring'] +sources = files('bbdev_turbo_software.c') diff --git a/src/spdk/dpdk/drivers/baseband/turbo_sw/rte_pmd_bbdev_turbo_sw_version.map b/src/spdk/dpdk/drivers/baseband/turbo_sw/rte_pmd_bbdev_turbo_sw_version.map new file mode 100644 index 000000000..f9f17e4f6 --- /dev/null +++ b/src/spdk/dpdk/drivers/baseband/turbo_sw/rte_pmd_bbdev_turbo_sw_version.map @@ -0,0 +1,3 @@ +DPDK_20.0 { + local: *; +}; |