diff options
Diffstat (limited to 'drivers/crypto/intel')
42 files changed, 6056 insertions, 535 deletions
diff --git a/drivers/crypto/intel/Kconfig b/drivers/crypto/intel/Kconfig index 3d90c87d40..f38cd62a3f 100644 --- a/drivers/crypto/intel/Kconfig +++ b/drivers/crypto/intel/Kconfig @@ -3,3 +3,4 @@ source "drivers/crypto/intel/keembay/Kconfig" source "drivers/crypto/intel/ixp4xx/Kconfig" source "drivers/crypto/intel/qat/Kconfig" +source "drivers/crypto/intel/iaa/Kconfig" diff --git a/drivers/crypto/intel/Makefile b/drivers/crypto/intel/Makefile index b3d0352ae1..2f56f6d34c 100644 --- a/drivers/crypto/intel/Makefile +++ b/drivers/crypto/intel/Makefile @@ -3,3 +3,4 @@ obj-y += keembay/ obj-y += ixp4xx/ obj-$(CONFIG_CRYPTO_DEV_QAT) += qat/ +obj-$(CONFIG_CRYPTO_DEV_IAA_CRYPTO) += iaa/ diff --git a/drivers/crypto/intel/iaa/Kconfig b/drivers/crypto/intel/iaa/Kconfig new file mode 100644 index 0000000000..d53f4b1d49 --- /dev/null +++ b/drivers/crypto/intel/iaa/Kconfig @@ -0,0 +1,19 @@ +config CRYPTO_DEV_IAA_CRYPTO + tristate "Support for Intel(R) IAA Compression Accelerator" + depends on CRYPTO_DEFLATE + depends on INTEL_IDXD + default n + help + This driver supports acceleration for compression and + decompression with the Intel Analytics Accelerator (IAA) + hardware using the cryptographic API. If you choose 'M' + here, the module will be called iaa_crypto. + +config CRYPTO_DEV_IAA_CRYPTO_STATS + bool "Enable Intel(R) IAA Compression Accelerator Statistics" + depends on CRYPTO_DEV_IAA_CRYPTO + default n + help + Enable statistics for the IAA compression accelerator. + These include per-device and per-workqueue statistics in + addition to global driver statistics. diff --git a/drivers/crypto/intel/iaa/Makefile b/drivers/crypto/intel/iaa/Makefile new file mode 100644 index 0000000000..b64b208d23 --- /dev/null +++ b/drivers/crypto/intel/iaa/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for IAA crypto device drivers +# + +ccflags-y += -I $(srctree)/drivers/dma/idxd -DDEFAULT_SYMBOL_NAMESPACE=IDXD + +obj-$(CONFIG_CRYPTO_DEV_IAA_CRYPTO) := iaa_crypto.o + +iaa_crypto-y := iaa_crypto_main.o iaa_crypto_comp_fixed.o + +iaa_crypto-$(CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS) += iaa_crypto_stats.o diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h new file mode 100644 index 0000000000..014420f7be --- /dev/null +++ b/drivers/crypto/intel/iaa/iaa_crypto.h @@ -0,0 +1,173 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ + +#ifndef __IAA_CRYPTO_H__ +#define __IAA_CRYPTO_H__ + +#include <linux/crypto.h> +#include <linux/idxd.h> +#include <uapi/linux/idxd.h> + +#define IDXD_SUBDRIVER_NAME "crypto" + +#define IAA_DECOMP_ENABLE BIT(0) +#define IAA_DECOMP_FLUSH_OUTPUT BIT(1) +#define IAA_DECOMP_CHECK_FOR_EOB BIT(2) +#define IAA_DECOMP_STOP_ON_EOB BIT(3) +#define IAA_DECOMP_SUPPRESS_OUTPUT BIT(9) + +#define IAA_COMP_FLUSH_OUTPUT BIT(1) +#define IAA_COMP_APPEND_EOB BIT(2) + +#define IAA_COMPLETION_TIMEOUT 1000000 + +#define IAA_ANALYTICS_ERROR 0x0a +#define IAA_ERROR_DECOMP_BUF_OVERFLOW 0x0b +#define IAA_ERROR_COMP_BUF_OVERFLOW 0x19 +#define IAA_ERROR_WATCHDOG_EXPIRED 0x24 + +#define IAA_COMP_MODES_MAX 2 + +#define FIXED_HDR 0x2 +#define FIXED_HDR_SIZE 3 + +#define IAA_COMP_FLAGS (IAA_COMP_FLUSH_OUTPUT | \ + IAA_COMP_APPEND_EOB) + +#define IAA_DECOMP_FLAGS (IAA_DECOMP_ENABLE | \ + IAA_DECOMP_FLUSH_OUTPUT | \ + IAA_DECOMP_CHECK_FOR_EOB | \ + IAA_DECOMP_STOP_ON_EOB) + +/* Representation of IAA workqueue */ +struct iaa_wq { + struct list_head list; + + struct idxd_wq *wq; + int ref; + bool remove; + + struct iaa_device *iaa_device; + + u64 comp_calls; + u64 comp_bytes; + u64 decomp_calls; + u64 decomp_bytes; +}; + +struct iaa_device_compression_mode { + const char *name; + + struct aecs_comp_table_record *aecs_comp_table; + struct aecs_decomp_table_record *aecs_decomp_table; + + dma_addr_t aecs_comp_table_dma_addr; + dma_addr_t aecs_decomp_table_dma_addr; +}; + +/* Representation of IAA device with wqs, populated by probe */ +struct iaa_device { + struct list_head list; + struct idxd_device *idxd; + + struct iaa_device_compression_mode *compression_modes[IAA_COMP_MODES_MAX]; + + int n_wq; + struct list_head wqs; + + u64 comp_calls; + u64 comp_bytes; + u64 decomp_calls; + u64 decomp_bytes; +}; + +struct wq_table_entry { + struct idxd_wq **wqs; + int max_wqs; + int n_wqs; + int cur_wq; +}; + +#define IAA_AECS_ALIGN 32 + +/* + * Analytics Engine Configuration and State (AECS) contains parameters and + * internal state of the analytics engine. + */ +struct aecs_comp_table_record { + u32 crc; + u32 xor_checksum; + u32 reserved0[5]; + u32 num_output_accum_bits; + u8 output_accum[256]; + u32 ll_sym[286]; + u32 reserved1; + u32 reserved2; + u32 d_sym[30]; + u32 reserved_padding[2]; +} __packed; + +/* AECS for decompress */ +struct aecs_decomp_table_record { + u32 crc; + u32 xor_checksum; + u32 low_filter_param; + u32 high_filter_param; + u32 output_mod_idx; + u32 drop_init_decomp_out_bytes; + u32 reserved[36]; + u32 output_accum_data[2]; + u32 out_bits_valid; + u32 bit_off_indexing; + u32 input_accum_data[64]; + u8 size_qw[32]; + u32 decomp_state[1220]; +} __packed; + +int iaa_aecs_init_fixed(void); +void iaa_aecs_cleanup_fixed(void); + +typedef int (*iaa_dev_comp_init_fn_t) (struct iaa_device_compression_mode *mode); +typedef int (*iaa_dev_comp_free_fn_t) (struct iaa_device_compression_mode *mode); + +struct iaa_compression_mode { + const char *name; + u32 *ll_table; + int ll_table_size; + u32 *d_table; + int d_table_size; + u32 *header_table; + int header_table_size; + u16 gen_decomp_table_flags; + iaa_dev_comp_init_fn_t init; + iaa_dev_comp_free_fn_t free; +}; + +int add_iaa_compression_mode(const char *name, + const u32 *ll_table, + int ll_table_size, + const u32 *d_table, + int d_table_size, + const u8 *header_table, + int header_table_size, + u16 gen_decomp_table_flags, + iaa_dev_comp_init_fn_t init, + iaa_dev_comp_free_fn_t free); + +void remove_iaa_compression_mode(const char *name); + +enum iaa_mode { + IAA_MODE_FIXED, +}; + +struct iaa_compression_ctx { + enum iaa_mode mode; + bool verify_compress; + bool async_mode; + bool use_irq; +}; + +extern struct list_head iaa_devices; +extern struct mutex iaa_devices_lock; + +#endif diff --git a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c new file mode 100644 index 0000000000..45cf5d74f0 --- /dev/null +++ b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ + +#include "idxd.h" +#include "iaa_crypto.h" + +/* + * Fixed Huffman tables the IAA hardware requires to implement RFC-1951. + */ +static const u32 fixed_ll_sym[286] = { + 0x40030, 0x40031, 0x40032, 0x40033, 0x40034, 0x40035, 0x40036, 0x40037, + 0x40038, 0x40039, 0x4003A, 0x4003B, 0x4003C, 0x4003D, 0x4003E, 0x4003F, + 0x40040, 0x40041, 0x40042, 0x40043, 0x40044, 0x40045, 0x40046, 0x40047, + 0x40048, 0x40049, 0x4004A, 0x4004B, 0x4004C, 0x4004D, 0x4004E, 0x4004F, + 0x40050, 0x40051, 0x40052, 0x40053, 0x40054, 0x40055, 0x40056, 0x40057, + 0x40058, 0x40059, 0x4005A, 0x4005B, 0x4005C, 0x4005D, 0x4005E, 0x4005F, + 0x40060, 0x40061, 0x40062, 0x40063, 0x40064, 0x40065, 0x40066, 0x40067, + 0x40068, 0x40069, 0x4006A, 0x4006B, 0x4006C, 0x4006D, 0x4006E, 0x4006F, + 0x40070, 0x40071, 0x40072, 0x40073, 0x40074, 0x40075, 0x40076, 0x40077, + 0x40078, 0x40079, 0x4007A, 0x4007B, 0x4007C, 0x4007D, 0x4007E, 0x4007F, + 0x40080, 0x40081, 0x40082, 0x40083, 0x40084, 0x40085, 0x40086, 0x40087, + 0x40088, 0x40089, 0x4008A, 0x4008B, 0x4008C, 0x4008D, 0x4008E, 0x4008F, + 0x40090, 0x40091, 0x40092, 0x40093, 0x40094, 0x40095, 0x40096, 0x40097, + 0x40098, 0x40099, 0x4009A, 0x4009B, 0x4009C, 0x4009D, 0x4009E, 0x4009F, + 0x400A0, 0x400A1, 0x400A2, 0x400A3, 0x400A4, 0x400A5, 0x400A6, 0x400A7, + 0x400A8, 0x400A9, 0x400AA, 0x400AB, 0x400AC, 0x400AD, 0x400AE, 0x400AF, + 0x400B0, 0x400B1, 0x400B2, 0x400B3, 0x400B4, 0x400B5, 0x400B6, 0x400B7, + 0x400B8, 0x400B9, 0x400BA, 0x400BB, 0x400BC, 0x400BD, 0x400BE, 0x400BF, + 0x48190, 0x48191, 0x48192, 0x48193, 0x48194, 0x48195, 0x48196, 0x48197, + 0x48198, 0x48199, 0x4819A, 0x4819B, 0x4819C, 0x4819D, 0x4819E, 0x4819F, + 0x481A0, 0x481A1, 0x481A2, 0x481A3, 0x481A4, 0x481A5, 0x481A6, 0x481A7, + 0x481A8, 0x481A9, 0x481AA, 0x481AB, 0x481AC, 0x481AD, 0x481AE, 0x481AF, + 0x481B0, 0x481B1, 0x481B2, 0x481B3, 0x481B4, 0x481B5, 0x481B6, 0x481B7, + 0x481B8, 0x481B9, 0x481BA, 0x481BB, 0x481BC, 0x481BD, 0x481BE, 0x481BF, + 0x481C0, 0x481C1, 0x481C2, 0x481C3, 0x481C4, 0x481C5, 0x481C6, 0x481C7, + 0x481C8, 0x481C9, 0x481CA, 0x481CB, 0x481CC, 0x481CD, 0x481CE, 0x481CF, + 0x481D0, 0x481D1, 0x481D2, 0x481D3, 0x481D4, 0x481D5, 0x481D6, 0x481D7, + 0x481D8, 0x481D9, 0x481DA, 0x481DB, 0x481DC, 0x481DD, 0x481DE, 0x481DF, + 0x481E0, 0x481E1, 0x481E2, 0x481E3, 0x481E4, 0x481E5, 0x481E6, 0x481E7, + 0x481E8, 0x481E9, 0x481EA, 0x481EB, 0x481EC, 0x481ED, 0x481EE, 0x481EF, + 0x481F0, 0x481F1, 0x481F2, 0x481F3, 0x481F4, 0x481F5, 0x481F6, 0x481F7, + 0x481F8, 0x481F9, 0x481FA, 0x481FB, 0x481FC, 0x481FD, 0x481FE, 0x481FF, + 0x38000, 0x38001, 0x38002, 0x38003, 0x38004, 0x38005, 0x38006, 0x38007, + 0x38008, 0x38009, 0x3800A, 0x3800B, 0x3800C, 0x3800D, 0x3800E, 0x3800F, + 0x38010, 0x38011, 0x38012, 0x38013, 0x38014, 0x38015, 0x38016, 0x38017, + 0x400C0, 0x400C1, 0x400C2, 0x400C3, 0x400C4, 0x400C5 +}; + +static const u32 fixed_d_sym[30] = { + 0x28000, 0x28001, 0x28002, 0x28003, 0x28004, 0x28005, 0x28006, 0x28007, + 0x28008, 0x28009, 0x2800A, 0x2800B, 0x2800C, 0x2800D, 0x2800E, 0x2800F, + 0x28010, 0x28011, 0x28012, 0x28013, 0x28014, 0x28015, 0x28016, 0x28017, + 0x28018, 0x28019, 0x2801A, 0x2801B, 0x2801C, 0x2801D +}; + +static int init_fixed_mode(struct iaa_device_compression_mode *mode) +{ + struct aecs_comp_table_record *comp_table = mode->aecs_comp_table; + u32 bfinal = 1; + u32 offset; + + /* Configure aecs table using fixed Huffman table */ + comp_table->crc = 0; + comp_table->xor_checksum = 0; + offset = comp_table->num_output_accum_bits / 8; + comp_table->output_accum[offset] = FIXED_HDR | bfinal; + comp_table->num_output_accum_bits = FIXED_HDR_SIZE; + + return 0; +} + +int iaa_aecs_init_fixed(void) +{ + int ret; + + ret = add_iaa_compression_mode("fixed", + fixed_ll_sym, + sizeof(fixed_ll_sym), + fixed_d_sym, + sizeof(fixed_d_sym), + NULL, 0, 0, + init_fixed_mode, NULL); + if (!ret) + pr_debug("IAA fixed compression mode initialized\n"); + + return ret; +} + +void iaa_aecs_cleanup_fixed(void) +{ + remove_iaa_compression_mode("fixed"); +} diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c new file mode 100644 index 0000000000..64a2e87a55 --- /dev/null +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -0,0 +1,2197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/device.h> +#include <linux/iommu.h> +#include <uapi/linux/idxd.h> +#include <linux/highmem.h> +#include <linux/sched/smt.h> +#include <crypto/internal/acompress.h> + +#include "idxd.h" +#include "iaa_crypto.h" +#include "iaa_crypto_stats.h" + +#ifdef pr_fmt +#undef pr_fmt +#endif + +#define pr_fmt(fmt) "idxd: " IDXD_SUBDRIVER_NAME ": " fmt + +#define IAA_ALG_PRIORITY 300 + +/* number of iaa instances probed */ +static unsigned int nr_iaa; +static unsigned int nr_cpus; +static unsigned int nr_nodes; +static unsigned int nr_cpus_per_node; + +/* Number of physical cpus sharing each iaa instance */ +static unsigned int cpus_per_iaa; + +static struct crypto_comp *deflate_generic_tfm; + +/* Per-cpu lookup table for balanced wqs */ +static struct wq_table_entry __percpu *wq_table; + +static struct idxd_wq *wq_table_next_wq(int cpu) +{ + struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); + + if (++entry->cur_wq >= entry->n_wqs) + entry->cur_wq = 0; + + if (!entry->wqs[entry->cur_wq]) + return NULL; + + pr_debug("%s: returning wq at idx %d (iaa wq %d.%d) from cpu %d\n", __func__, + entry->cur_wq, entry->wqs[entry->cur_wq]->idxd->id, + entry->wqs[entry->cur_wq]->id, cpu); + + return entry->wqs[entry->cur_wq]; +} + +static void wq_table_add(int cpu, struct idxd_wq *wq) +{ + struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); + + if (WARN_ON(entry->n_wqs == entry->max_wqs)) + return; + + entry->wqs[entry->n_wqs++] = wq; + + pr_debug("%s: added iaa wq %d.%d to idx %d of cpu %d\n", __func__, + entry->wqs[entry->n_wqs - 1]->idxd->id, + entry->wqs[entry->n_wqs - 1]->id, entry->n_wqs - 1, cpu); +} + +static void wq_table_free_entry(int cpu) +{ + struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); + + kfree(entry->wqs); + memset(entry, 0, sizeof(*entry)); +} + +static void wq_table_clear_entry(int cpu) +{ + struct wq_table_entry *entry = per_cpu_ptr(wq_table, cpu); + + entry->n_wqs = 0; + entry->cur_wq = 0; + memset(entry->wqs, 0, entry->max_wqs * sizeof(struct idxd_wq *)); +} + +LIST_HEAD(iaa_devices); +DEFINE_MUTEX(iaa_devices_lock); + +/* If enabled, IAA hw crypto algos are registered, unavailable otherwise */ +static bool iaa_crypto_enabled; +static bool iaa_crypto_registered; + +/* Verify results of IAA compress or not */ +static bool iaa_verify_compress = true; + +static ssize_t verify_compress_show(struct device_driver *driver, char *buf) +{ + return sprintf(buf, "%d\n", iaa_verify_compress); +} + +static ssize_t verify_compress_store(struct device_driver *driver, + const char *buf, size_t count) +{ + int ret = -EBUSY; + + mutex_lock(&iaa_devices_lock); + + if (iaa_crypto_enabled) + goto out; + + ret = kstrtobool(buf, &iaa_verify_compress); + if (ret) + goto out; + + ret = count; +out: + mutex_unlock(&iaa_devices_lock); + + return ret; +} +static DRIVER_ATTR_RW(verify_compress); + +/* + * The iaa crypto driver supports three 'sync' methods determining how + * compressions and decompressions are performed: + * + * - sync: the compression or decompression completes before + * returning. This is the mode used by the async crypto + * interface when the sync mode is set to 'sync' and by + * the sync crypto interface regardless of setting. + * + * - async: the compression or decompression is submitted and returns + * immediately. Completion interrupts are not used so + * the caller is responsible for polling the descriptor + * for completion. This mode is applicable to only the + * async crypto interface and is ignored for anything + * else. + * + * - async_irq: the compression or decompression is submitted and + * returns immediately. Completion interrupts are + * enabled so the caller can wait for the completion and + * yield to other threads. When the compression or + * decompression completes, the completion is signaled + * and the caller awakened. This mode is applicable to + * only the async crypto interface and is ignored for + * anything else. + * + * These modes can be set using the iaa_crypto sync_mode driver + * attribute. + */ + +/* Use async mode */ +static bool async_mode; +/* Use interrupts */ +static bool use_irq; + +/** + * set_iaa_sync_mode - Set IAA sync mode + * @name: The name of the sync mode + * + * Make the IAA sync mode named @name the current sync mode used by + * compression/decompression. + */ + +static int set_iaa_sync_mode(const char *name) +{ + int ret = 0; + + if (sysfs_streq(name, "sync")) { + async_mode = false; + use_irq = false; + } else if (sysfs_streq(name, "async")) { + async_mode = true; + use_irq = false; + } else if (sysfs_streq(name, "async_irq")) { + async_mode = true; + use_irq = true; + } else { + ret = -EINVAL; + } + + return ret; +} + +static ssize_t sync_mode_show(struct device_driver *driver, char *buf) +{ + int ret = 0; + + if (!async_mode && !use_irq) + ret = sprintf(buf, "%s\n", "sync"); + else if (async_mode && !use_irq) + ret = sprintf(buf, "%s\n", "async"); + else if (async_mode && use_irq) + ret = sprintf(buf, "%s\n", "async_irq"); + + return ret; +} + +static ssize_t sync_mode_store(struct device_driver *driver, + const char *buf, size_t count) +{ + int ret = -EBUSY; + + mutex_lock(&iaa_devices_lock); + + if (iaa_crypto_enabled) + goto out; + + ret = set_iaa_sync_mode(buf); + if (ret == 0) + ret = count; +out: + mutex_unlock(&iaa_devices_lock); + + return ret; +} +static DRIVER_ATTR_RW(sync_mode); + +static struct iaa_compression_mode *iaa_compression_modes[IAA_COMP_MODES_MAX]; + +static int find_empty_iaa_compression_mode(void) +{ + int i = -EINVAL; + + for (i = 0; i < IAA_COMP_MODES_MAX; i++) { + if (iaa_compression_modes[i]) + continue; + break; + } + + return i; +} + +static struct iaa_compression_mode *find_iaa_compression_mode(const char *name, int *idx) +{ + struct iaa_compression_mode *mode; + int i; + + for (i = 0; i < IAA_COMP_MODES_MAX; i++) { + mode = iaa_compression_modes[i]; + if (!mode) + continue; + + if (!strcmp(mode->name, name)) { + *idx = i; + return iaa_compression_modes[i]; + } + } + + return NULL; +} + +static void free_iaa_compression_mode(struct iaa_compression_mode *mode) +{ + kfree(mode->name); + kfree(mode->ll_table); + kfree(mode->d_table); + kfree(mode->header_table); + + kfree(mode); +} + +/* + * IAA Compression modes are defined by an ll_table, a d_table, and an + * optional header_table. These tables are typically generated and + * captured using statistics collected from running actual + * compress/decompress workloads. + * + * A module or other kernel code can add and remove compression modes + * with a given name using the exported @add_iaa_compression_mode() + * and @remove_iaa_compression_mode functions. + * + * When a new compression mode is added, the tables are saved in a + * global compression mode list. When IAA devices are added, a + * per-IAA device dma mapping is created for each IAA device, for each + * compression mode. These are the tables used to do the actual + * compression/deccompression and are unmapped if/when the devices are + * removed. Currently, compression modes must be added before any + * device is added, and removed after all devices have been removed. + */ + +/** + * remove_iaa_compression_mode - Remove an IAA compression mode + * @name: The name the compression mode will be known as + * + * Remove the IAA compression mode named @name. + */ +void remove_iaa_compression_mode(const char *name) +{ + struct iaa_compression_mode *mode; + int idx; + + mutex_lock(&iaa_devices_lock); + + if (!list_empty(&iaa_devices)) + goto out; + + mode = find_iaa_compression_mode(name, &idx); + if (mode) { + free_iaa_compression_mode(mode); + iaa_compression_modes[idx] = NULL; + } +out: + mutex_unlock(&iaa_devices_lock); +} +EXPORT_SYMBOL_GPL(remove_iaa_compression_mode); + +/** + * add_iaa_compression_mode - Add an IAA compression mode + * @name: The name the compression mode will be known as + * @ll_table: The ll table + * @ll_table_size: The ll table size in bytes + * @d_table: The d table + * @d_table_size: The d table size in bytes + * @header_table: Optional header table + * @header_table_size: Optional header table size in bytes + * @gen_decomp_table_flags: Otional flags used to generate the decomp table + * @init: Optional callback function to init the compression mode data + * @free: Optional callback function to free the compression mode data + * + * Add a new IAA compression mode named @name. + * + * Returns 0 if successful, errcode otherwise. + */ +int add_iaa_compression_mode(const char *name, + const u32 *ll_table, + int ll_table_size, + const u32 *d_table, + int d_table_size, + const u8 *header_table, + int header_table_size, + u16 gen_decomp_table_flags, + iaa_dev_comp_init_fn_t init, + iaa_dev_comp_free_fn_t free) +{ + struct iaa_compression_mode *mode; + int idx, ret = -ENOMEM; + + mutex_lock(&iaa_devices_lock); + + if (!list_empty(&iaa_devices)) { + ret = -EBUSY; + goto out; + } + + mode = kzalloc(sizeof(*mode), GFP_KERNEL); + if (!mode) + goto out; + + mode->name = kstrdup(name, GFP_KERNEL); + if (!mode->name) + goto free; + + if (ll_table) { + mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL); + if (!mode->ll_table) + goto free; + memcpy(mode->ll_table, ll_table, ll_table_size); + mode->ll_table_size = ll_table_size; + } + + if (d_table) { + mode->d_table = kzalloc(d_table_size, GFP_KERNEL); + if (!mode->d_table) + goto free; + memcpy(mode->d_table, d_table, d_table_size); + mode->d_table_size = d_table_size; + } + + if (header_table) { + mode->header_table = kzalloc(header_table_size, GFP_KERNEL); + if (!mode->header_table) + goto free; + memcpy(mode->header_table, header_table, header_table_size); + mode->header_table_size = header_table_size; + } + + mode->gen_decomp_table_flags = gen_decomp_table_flags; + + mode->init = init; + mode->free = free; + + idx = find_empty_iaa_compression_mode(); + if (idx < 0) + goto free; + + pr_debug("IAA compression mode %s added at idx %d\n", + mode->name, idx); + + iaa_compression_modes[idx] = mode; + + ret = 0; +out: + mutex_unlock(&iaa_devices_lock); + + return ret; +free: + free_iaa_compression_mode(mode); + goto out; +} +EXPORT_SYMBOL_GPL(add_iaa_compression_mode); + +static struct iaa_device_compression_mode * +get_iaa_device_compression_mode(struct iaa_device *iaa_device, int idx) +{ + return iaa_device->compression_modes[idx]; +} + +static void free_device_compression_mode(struct iaa_device *iaa_device, + struct iaa_device_compression_mode *device_mode) +{ + size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN; + struct device *dev = &iaa_device->idxd->pdev->dev; + + kfree(device_mode->name); + + if (device_mode->aecs_comp_table) + dma_free_coherent(dev, size, device_mode->aecs_comp_table, + device_mode->aecs_comp_table_dma_addr); + if (device_mode->aecs_decomp_table) + dma_free_coherent(dev, size, device_mode->aecs_decomp_table, + device_mode->aecs_decomp_table_dma_addr); + + kfree(device_mode); +} + +#define IDXD_OP_FLAG_AECS_RW_TGLS 0x400000 +#define IAX_AECS_DEFAULT_FLAG (IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC) +#define IAX_AECS_COMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS) +#define IAX_AECS_DECOMPRESS_FLAG (IAX_AECS_DEFAULT_FLAG | IDXD_OP_FLAG_RD_SRC2_AECS) +#define IAX_AECS_GEN_FLAG (IAX_AECS_DEFAULT_FLAG | \ + IDXD_OP_FLAG_WR_SRC2_AECS_COMP | \ + IDXD_OP_FLAG_AECS_RW_TGLS) + +static int check_completion(struct device *dev, + struct iax_completion_record *comp, + bool compress, + bool only_once); + +static int decompress_header(struct iaa_device_compression_mode *device_mode, + struct iaa_compression_mode *mode, + struct idxd_wq *wq) +{ + dma_addr_t src_addr, src2_addr; + struct idxd_desc *idxd_desc; + struct iax_hw_desc *desc; + struct device *dev; + int ret = 0; + + idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(idxd_desc)) + return PTR_ERR(idxd_desc); + + desc = idxd_desc->iax_hw; + + dev = &wq->idxd->pdev->dev; + + src_addr = dma_map_single(dev, (void *)mode->header_table, + mode->header_table_size, DMA_TO_DEVICE); + dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n", + __func__, mode->name, src_addr, dev, + mode->header_table, mode->header_table_size); + if (unlikely(dma_mapping_error(dev, src_addr))) { + dev_dbg(dev, "dma_map_single err, exiting\n"); + ret = -ENOMEM; + return ret; + } + + desc->flags = IAX_AECS_GEN_FLAG; + desc->opcode = IAX_OPCODE_DECOMPRESS; + + desc->src1_addr = (u64)src_addr; + desc->src1_size = mode->header_table_size; + + src2_addr = device_mode->aecs_decomp_table_dma_addr; + desc->src2_addr = (u64)src2_addr; + desc->src2_size = 1088; + dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n", + __func__, mode->name, desc->src2_addr, dev, desc->src2_size); + desc->max_dst_size = 0; // suppressed output + + desc->decompr_flags = mode->gen_decomp_table_flags; + + desc->priv = 0; + + desc->completion_addr = idxd_desc->compl_dma; + + ret = idxd_submit_desc(wq, idxd_desc); + if (ret) { + pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret); + goto out; + } + + ret = check_completion(dev, idxd_desc->iax_completion, false, false); + if (ret) + dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n", + __func__, mode->name, ret); + else + dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__, + mode->name); +out: + dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE); + + return ret; +} + +static int init_device_compression_mode(struct iaa_device *iaa_device, + struct iaa_compression_mode *mode, + int idx, struct idxd_wq *wq) +{ + size_t size = sizeof(struct aecs_comp_table_record) + IAA_AECS_ALIGN; + struct device *dev = &iaa_device->idxd->pdev->dev; + struct iaa_device_compression_mode *device_mode; + int ret = -ENOMEM; + + device_mode = kzalloc(sizeof(*device_mode), GFP_KERNEL); + if (!device_mode) + return -ENOMEM; + + device_mode->name = kstrdup(mode->name, GFP_KERNEL); + if (!device_mode->name) + goto free; + + device_mode->aecs_comp_table = dma_alloc_coherent(dev, size, + &device_mode->aecs_comp_table_dma_addr, GFP_KERNEL); + if (!device_mode->aecs_comp_table) + goto free; + + device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size, + &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL); + if (!device_mode->aecs_decomp_table) + goto free; + + /* Add Huffman table to aecs */ + memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table)); + memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size); + memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size); + + if (mode->header_table) { + ret = decompress_header(device_mode, mode, wq); + if (ret) { + pr_debug("iaa header decompression failed: ret=%d\n", ret); + goto free; + } + } + + if (mode->init) { + ret = mode->init(device_mode); + if (ret) + goto free; + } + + /* mode index should match iaa_compression_modes idx */ + iaa_device->compression_modes[idx] = device_mode; + + pr_debug("IAA %s compression mode initialized for iaa device %d\n", + mode->name, iaa_device->idxd->id); + + ret = 0; +out: + return ret; +free: + pr_debug("IAA %s compression mode initialization failed for iaa device %d\n", + mode->name, iaa_device->idxd->id); + + free_device_compression_mode(iaa_device, device_mode); + goto out; +} + +static int init_device_compression_modes(struct iaa_device *iaa_device, + struct idxd_wq *wq) +{ + struct iaa_compression_mode *mode; + int i, ret = 0; + + for (i = 0; i < IAA_COMP_MODES_MAX; i++) { + mode = iaa_compression_modes[i]; + if (!mode) + continue; + + ret = init_device_compression_mode(iaa_device, mode, i, wq); + if (ret) + break; + } + + return ret; +} + +static void remove_device_compression_modes(struct iaa_device *iaa_device) +{ + struct iaa_device_compression_mode *device_mode; + int i; + + for (i = 0; i < IAA_COMP_MODES_MAX; i++) { + device_mode = iaa_device->compression_modes[i]; + if (!device_mode) + continue; + + free_device_compression_mode(iaa_device, device_mode); + iaa_device->compression_modes[i] = NULL; + if (iaa_compression_modes[i]->free) + iaa_compression_modes[i]->free(device_mode); + } +} + +static struct iaa_device *iaa_device_alloc(void) +{ + struct iaa_device *iaa_device; + + iaa_device = kzalloc(sizeof(*iaa_device), GFP_KERNEL); + if (!iaa_device) + return NULL; + + INIT_LIST_HEAD(&iaa_device->wqs); + + return iaa_device; +} + +static bool iaa_has_wq(struct iaa_device *iaa_device, struct idxd_wq *wq) +{ + struct iaa_wq *iaa_wq; + + list_for_each_entry(iaa_wq, &iaa_device->wqs, list) { + if (iaa_wq->wq == wq) + return true; + } + + return false; +} + +static struct iaa_device *add_iaa_device(struct idxd_device *idxd) +{ + struct iaa_device *iaa_device; + + iaa_device = iaa_device_alloc(); + if (!iaa_device) + return NULL; + + iaa_device->idxd = idxd; + + list_add_tail(&iaa_device->list, &iaa_devices); + + nr_iaa++; + + return iaa_device; +} + +static int init_iaa_device(struct iaa_device *iaa_device, struct iaa_wq *iaa_wq) +{ + int ret = 0; + + ret = init_device_compression_modes(iaa_device, iaa_wq->wq); + if (ret) + return ret; + + return ret; +} + +static void del_iaa_device(struct iaa_device *iaa_device) +{ + list_del(&iaa_device->list); + + nr_iaa--; +} + +static int add_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq, + struct iaa_wq **new_wq) +{ + struct idxd_device *idxd = iaa_device->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct iaa_wq *iaa_wq; + + iaa_wq = kzalloc(sizeof(*iaa_wq), GFP_KERNEL); + if (!iaa_wq) + return -ENOMEM; + + iaa_wq->wq = wq; + iaa_wq->iaa_device = iaa_device; + idxd_wq_set_private(wq, iaa_wq); + + list_add_tail(&iaa_wq->list, &iaa_device->wqs); + + iaa_device->n_wq++; + + if (new_wq) + *new_wq = iaa_wq; + + dev_dbg(dev, "added wq %d to iaa device %d, n_wq %d\n", + wq->id, iaa_device->idxd->id, iaa_device->n_wq); + + return 0; +} + +static void del_iaa_wq(struct iaa_device *iaa_device, struct idxd_wq *wq) +{ + struct idxd_device *idxd = iaa_device->idxd; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct iaa_wq *iaa_wq; + + list_for_each_entry(iaa_wq, &iaa_device->wqs, list) { + if (iaa_wq->wq == wq) { + list_del(&iaa_wq->list); + iaa_device->n_wq--; + + dev_dbg(dev, "removed wq %d from iaa_device %d, n_wq %d, nr_iaa %d\n", + wq->id, iaa_device->idxd->id, + iaa_device->n_wq, nr_iaa); + + if (iaa_device->n_wq == 0) + del_iaa_device(iaa_device); + break; + } + } +} + +static void clear_wq_table(void) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpus; cpu++) + wq_table_clear_entry(cpu); + + pr_debug("cleared wq table\n"); +} + +static void free_iaa_device(struct iaa_device *iaa_device) +{ + if (!iaa_device) + return; + + remove_device_compression_modes(iaa_device); + kfree(iaa_device); +} + +static void __free_iaa_wq(struct iaa_wq *iaa_wq) +{ + struct iaa_device *iaa_device; + + if (!iaa_wq) + return; + + iaa_device = iaa_wq->iaa_device; + if (iaa_device->n_wq == 0) + free_iaa_device(iaa_wq->iaa_device); +} + +static void free_iaa_wq(struct iaa_wq *iaa_wq) +{ + struct idxd_wq *wq; + + __free_iaa_wq(iaa_wq); + + wq = iaa_wq->wq; + + kfree(iaa_wq); + idxd_wq_set_private(wq, NULL); +} + +static int iaa_wq_get(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct iaa_wq *iaa_wq; + int ret = 0; + + spin_lock(&idxd->dev_lock); + iaa_wq = idxd_wq_get_private(wq); + if (iaa_wq && !iaa_wq->remove) { + iaa_wq->ref++; + idxd_wq_get(wq); + } else { + ret = -ENODEV; + } + spin_unlock(&idxd->dev_lock); + + return ret; +} + +static int iaa_wq_put(struct idxd_wq *wq) +{ + struct idxd_device *idxd = wq->idxd; + struct iaa_wq *iaa_wq; + bool free = false; + int ret = 0; + + spin_lock(&idxd->dev_lock); + iaa_wq = idxd_wq_get_private(wq); + if (iaa_wq) { + iaa_wq->ref--; + if (iaa_wq->ref == 0 && iaa_wq->remove) { + idxd_wq_set_private(wq, NULL); + free = true; + } + idxd_wq_put(wq); + } else { + ret = -ENODEV; + } + spin_unlock(&idxd->dev_lock); + if (free) { + __free_iaa_wq(iaa_wq); + kfree(iaa_wq); + } + + return ret; +} + +static void free_wq_table(void) +{ + int cpu; + + for (cpu = 0; cpu < nr_cpus; cpu++) + wq_table_free_entry(cpu); + + free_percpu(wq_table); + + pr_debug("freed wq table\n"); +} + +static int alloc_wq_table(int max_wqs) +{ + struct wq_table_entry *entry; + int cpu; + + wq_table = alloc_percpu(struct wq_table_entry); + if (!wq_table) + return -ENOMEM; + + for (cpu = 0; cpu < nr_cpus; cpu++) { + entry = per_cpu_ptr(wq_table, cpu); + entry->wqs = kcalloc(max_wqs, sizeof(struct wq *), GFP_KERNEL); + if (!entry->wqs) { + free_wq_table(); + return -ENOMEM; + } + + entry->max_wqs = max_wqs; + } + + pr_debug("initialized wq table\n"); + + return 0; +} + +static int save_iaa_wq(struct idxd_wq *wq) +{ + struct iaa_device *iaa_device, *found = NULL; + struct idxd_device *idxd; + struct pci_dev *pdev; + struct device *dev; + int ret = 0; + + list_for_each_entry(iaa_device, &iaa_devices, list) { + if (iaa_device->idxd == wq->idxd) { + idxd = iaa_device->idxd; + pdev = idxd->pdev; + dev = &pdev->dev; + /* + * Check to see that we don't already have this wq. + * Shouldn't happen but we don't control probing. + */ + if (iaa_has_wq(iaa_device, wq)) { + dev_dbg(dev, "same wq probed multiple times for iaa_device %p\n", + iaa_device); + goto out; + } + + found = iaa_device; + + ret = add_iaa_wq(iaa_device, wq, NULL); + if (ret) + goto out; + + break; + } + } + + if (!found) { + struct iaa_device *new_device; + struct iaa_wq *new_wq; + + new_device = add_iaa_device(wq->idxd); + if (!new_device) { + ret = -ENOMEM; + goto out; + } + + ret = add_iaa_wq(new_device, wq, &new_wq); + if (ret) { + del_iaa_device(new_device); + free_iaa_device(new_device); + goto out; + } + + ret = init_iaa_device(new_device, new_wq); + if (ret) { + del_iaa_wq(new_device, new_wq->wq); + del_iaa_device(new_device); + free_iaa_wq(new_wq); + goto out; + } + } + + if (WARN_ON(nr_iaa == 0)) + return -EINVAL; + + cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; + if (!cpus_per_iaa) + cpus_per_iaa = 1; +out: + return 0; +} + +static void remove_iaa_wq(struct idxd_wq *wq) +{ + struct iaa_device *iaa_device; + + list_for_each_entry(iaa_device, &iaa_devices, list) { + if (iaa_has_wq(iaa_device, wq)) { + del_iaa_wq(iaa_device, wq); + break; + } + } + + if (nr_iaa) { + cpus_per_iaa = (nr_nodes * nr_cpus_per_node) / nr_iaa; + if (!cpus_per_iaa) + cpus_per_iaa = 1; + } else + cpus_per_iaa = 1; +} + +static int wq_table_add_wqs(int iaa, int cpu) +{ + struct iaa_device *iaa_device, *found_device = NULL; + int ret = 0, cur_iaa = 0, n_wqs_added = 0; + struct idxd_device *idxd; + struct iaa_wq *iaa_wq; + struct pci_dev *pdev; + struct device *dev; + + list_for_each_entry(iaa_device, &iaa_devices, list) { + idxd = iaa_device->idxd; + pdev = idxd->pdev; + dev = &pdev->dev; + + if (cur_iaa != iaa) { + cur_iaa++; + continue; + } + + found_device = iaa_device; + dev_dbg(dev, "getting wq from iaa_device %d, cur_iaa %d\n", + found_device->idxd->id, cur_iaa); + break; + } + + if (!found_device) { + found_device = list_first_entry_or_null(&iaa_devices, + struct iaa_device, list); + if (!found_device) { + pr_debug("couldn't find any iaa devices with wqs!\n"); + ret = -EINVAL; + goto out; + } + cur_iaa = 0; + + idxd = found_device->idxd; + pdev = idxd->pdev; + dev = &pdev->dev; + dev_dbg(dev, "getting wq from only iaa_device %d, cur_iaa %d\n", + found_device->idxd->id, cur_iaa); + } + + list_for_each_entry(iaa_wq, &found_device->wqs, list) { + wq_table_add(cpu, iaa_wq->wq); + pr_debug("rebalance: added wq for cpu=%d: iaa wq %d.%d\n", + cpu, iaa_wq->wq->idxd->id, iaa_wq->wq->id); + n_wqs_added++; + } + + if (!n_wqs_added) { + pr_debug("couldn't find any iaa wqs!\n"); + ret = -EINVAL; + goto out; + } +out: + return ret; +} + +/* + * Rebalance the wq table so that given a cpu, it's easy to find the + * closest IAA instance. The idea is to try to choose the most + * appropriate IAA instance for a caller and spread available + * workqueues around to clients. + */ +static void rebalance_wq_table(void) +{ + const struct cpumask *node_cpus; + int node, cpu, iaa = -1; + + if (nr_iaa == 0) + return; + + pr_debug("rebalance: nr_nodes=%d, nr_cpus %d, nr_iaa %d, cpus_per_iaa %d\n", + nr_nodes, nr_cpus, nr_iaa, cpus_per_iaa); + + clear_wq_table(); + + if (nr_iaa == 1) { + for (cpu = 0; cpu < nr_cpus; cpu++) { + if (WARN_ON(wq_table_add_wqs(0, cpu))) { + pr_debug("could not add any wqs for iaa 0 to cpu %d!\n", cpu); + return; + } + } + + return; + } + + for_each_node_with_cpus(node) { + node_cpus = cpumask_of_node(node); + + for (cpu = 0; cpu < nr_cpus_per_node; cpu++) { + int node_cpu = cpumask_nth(cpu, node_cpus); + + if (WARN_ON(node_cpu >= nr_cpu_ids)) { + pr_debug("node_cpu %d doesn't exist!\n", node_cpu); + return; + } + + if ((cpu % cpus_per_iaa) == 0) + iaa++; + + if (WARN_ON(wq_table_add_wqs(iaa, node_cpu))) { + pr_debug("could not add any wqs for iaa %d to cpu %d!\n", iaa, cpu); + return; + } + } + } +} + +static inline int check_completion(struct device *dev, + struct iax_completion_record *comp, + bool compress, + bool only_once) +{ + char *op_str = compress ? "compress" : "decompress"; + int ret = 0; + + while (!comp->status) { + if (only_once) + return -EAGAIN; + cpu_relax(); + } + + if (comp->status != IAX_COMP_SUCCESS) { + if (comp->status == IAA_ERROR_WATCHDOG_EXPIRED) { + ret = -ETIMEDOUT; + dev_dbg(dev, "%s timed out, size=0x%x\n", + op_str, comp->output_size); + update_completion_timeout_errs(); + goto out; + } + + if (comp->status == IAA_ANALYTICS_ERROR && + comp->error_code == IAA_ERROR_COMP_BUF_OVERFLOW && compress) { + ret = -E2BIG; + dev_dbg(dev, "compressed > uncompressed size," + " not compressing, size=0x%x\n", + comp->output_size); + update_completion_comp_buf_overflow_errs(); + goto out; + } + + if (comp->status == IAA_ERROR_DECOMP_BUF_OVERFLOW) { + ret = -EOVERFLOW; + goto out; + } + + ret = -EINVAL; + dev_dbg(dev, "iaa %s status=0x%x, error=0x%x, size=0x%x\n", + op_str, comp->status, comp->error_code, comp->output_size); + print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, 8, 1, comp, 64, 0); + update_completion_einval_errs(); + + goto out; + } +out: + return ret; +} + +static int deflate_generic_decompress(struct acomp_req *req) +{ + void *src, *dst; + int ret; + + src = kmap_local_page(sg_page(req->src)) + req->src->offset; + dst = kmap_local_page(sg_page(req->dst)) + req->dst->offset; + + ret = crypto_comp_decompress(deflate_generic_tfm, + src, req->slen, dst, &req->dlen); + + kunmap_local(src); + kunmap_local(dst); + + update_total_sw_decomp_calls(); + + return ret; +} + +static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, + struct acomp_req *req, + dma_addr_t *src_addr, dma_addr_t *dst_addr); + +static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, + struct idxd_wq *wq, + dma_addr_t src_addr, unsigned int slen, + dma_addr_t dst_addr, unsigned int *dlen, + u32 compression_crc); + +static void iaa_desc_complete(struct idxd_desc *idxd_desc, + enum idxd_complete_type comp_type, + bool free_desc, void *__ctx, + u32 *status) +{ + struct iaa_device_compression_mode *active_compression_mode; + struct iaa_compression_ctx *compression_ctx; + struct crypto_ctx *ctx = __ctx; + struct iaa_device *iaa_device; + struct idxd_device *idxd; + struct iaa_wq *iaa_wq; + struct pci_dev *pdev; + struct device *dev; + int ret, err = 0; + + compression_ctx = crypto_tfm_ctx(ctx->tfm); + + iaa_wq = idxd_wq_get_private(idxd_desc->wq); + iaa_device = iaa_wq->iaa_device; + idxd = iaa_device->idxd; + pdev = idxd->pdev; + dev = &pdev->dev; + + active_compression_mode = get_iaa_device_compression_mode(iaa_device, + compression_ctx->mode); + dev_dbg(dev, "%s: compression mode %s," + " ctx->src_addr %llx, ctx->dst_addr %llx\n", __func__, + active_compression_mode->name, + ctx->src_addr, ctx->dst_addr); + + ret = check_completion(dev, idxd_desc->iax_completion, + ctx->compress, false); + if (ret) { + dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret); + if (!ctx->compress && + idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) { + pr_warn("%s: falling back to deflate-generic decompress, " + "analytics error code %x\n", __func__, + idxd_desc->iax_completion->error_code); + ret = deflate_generic_decompress(ctx->req); + if (ret) { + dev_dbg(dev, "%s: deflate-generic failed ret=%d\n", + __func__, ret); + err = -EIO; + goto err; + } + } else { + err = -EIO; + goto err; + } + } else { + ctx->req->dlen = idxd_desc->iax_completion->output_size; + } + + /* Update stats */ + if (ctx->compress) { + update_total_comp_bytes_out(ctx->req->dlen); + update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen); + } else { + update_total_decomp_bytes_in(ctx->req->dlen); + update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen); + } + + if (ctx->compress && compression_ctx->verify_compress) { + dma_addr_t src_addr, dst_addr; + u32 compression_crc; + + compression_crc = idxd_desc->iax_completion->crc; + + ret = iaa_remap_for_verify(dev, iaa_wq, ctx->req, &src_addr, &dst_addr); + if (ret) { + dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret); + err = -EIO; + goto out; + } + + ret = iaa_compress_verify(ctx->tfm, ctx->req, iaa_wq->wq, src_addr, + ctx->req->slen, dst_addr, &ctx->req->dlen, + compression_crc); + if (ret) { + dev_dbg(dev, "%s: compress verify failed ret=%d\n", __func__, ret); + err = -EIO; + } + + dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_TO_DEVICE); + dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_FROM_DEVICE); + + goto out; + } +err: + dma_unmap_sg(dev, ctx->req->dst, sg_nents(ctx->req->dst), DMA_FROM_DEVICE); + dma_unmap_sg(dev, ctx->req->src, sg_nents(ctx->req->src), DMA_TO_DEVICE); +out: + if (ret != 0) + dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret); + + if (ctx->req->base.complete) + acomp_request_complete(ctx->req, err); + + if (free_desc) + idxd_free_desc(idxd_desc->wq, idxd_desc); + iaa_wq_put(idxd_desc->wq); +} + +static int iaa_compress(struct crypto_tfm *tfm, struct acomp_req *req, + struct idxd_wq *wq, + dma_addr_t src_addr, unsigned int slen, + dma_addr_t dst_addr, unsigned int *dlen, + u32 *compression_crc, + bool disable_async) +{ + struct iaa_device_compression_mode *active_compression_mode; + struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); + struct iaa_device *iaa_device; + struct idxd_desc *idxd_desc; + struct iax_hw_desc *desc; + struct idxd_device *idxd; + struct iaa_wq *iaa_wq; + struct pci_dev *pdev; + struct device *dev; + int ret = 0; + + iaa_wq = idxd_wq_get_private(wq); + iaa_device = iaa_wq->iaa_device; + idxd = iaa_device->idxd; + pdev = idxd->pdev; + dev = &pdev->dev; + + active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); + + idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(idxd_desc)) { + dev_dbg(dev, "idxd descriptor allocation failed\n"); + dev_dbg(dev, "iaa compress failed: ret=%ld\n", PTR_ERR(idxd_desc)); + return PTR_ERR(idxd_desc); + } + desc = idxd_desc->iax_hw; + + desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | + IDXD_OP_FLAG_RD_SRC2_AECS | IDXD_OP_FLAG_CC; + desc->opcode = IAX_OPCODE_COMPRESS; + desc->compr_flags = IAA_COMP_FLAGS; + desc->priv = 0; + + desc->src1_addr = (u64)src_addr; + desc->src1_size = slen; + desc->dst_addr = (u64)dst_addr; + desc->max_dst_size = *dlen; + desc->src2_addr = active_compression_mode->aecs_comp_table_dma_addr; + desc->src2_size = sizeof(struct aecs_comp_table_record); + desc->completion_addr = idxd_desc->compl_dma; + + if (ctx->use_irq && !disable_async) { + desc->flags |= IDXD_OP_FLAG_RCI; + + idxd_desc->crypto.req = req; + idxd_desc->crypto.tfm = tfm; + idxd_desc->crypto.src_addr = src_addr; + idxd_desc->crypto.dst_addr = dst_addr; + idxd_desc->crypto.compress = true; + + dev_dbg(dev, "%s use_async_irq: compression mode %s," + " src_addr %llx, dst_addr %llx\n", __func__, + active_compression_mode->name, + src_addr, dst_addr); + } else if (ctx->async_mode && !disable_async) + req->base.data = idxd_desc; + + dev_dbg(dev, "%s: compression mode %s," + " desc->src1_addr %llx, desc->src1_size %d," + " desc->dst_addr %llx, desc->max_dst_size %d," + " desc->src2_addr %llx, desc->src2_size %d\n", __func__, + active_compression_mode->name, + desc->src1_addr, desc->src1_size, desc->dst_addr, + desc->max_dst_size, desc->src2_addr, desc->src2_size); + + ret = idxd_submit_desc(wq, idxd_desc); + if (ret) { + dev_dbg(dev, "submit_desc failed ret=%d\n", ret); + goto err; + } + + /* Update stats */ + update_total_comp_calls(); + update_wq_comp_calls(wq); + + if (ctx->async_mode && !disable_async) { + ret = -EINPROGRESS; + dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); + goto out; + } + + ret = check_completion(dev, idxd_desc->iax_completion, true, false); + if (ret) { + dev_dbg(dev, "check_completion failed ret=%d\n", ret); + goto err; + } + + *dlen = idxd_desc->iax_completion->output_size; + + /* Update stats */ + update_total_comp_bytes_out(*dlen); + update_wq_comp_bytes(wq, *dlen); + + *compression_crc = idxd_desc->iax_completion->crc; + + if (!ctx->async_mode || disable_async) + idxd_free_desc(wq, idxd_desc); +out: + return ret; +err: + idxd_free_desc(wq, idxd_desc); + dev_dbg(dev, "iaa compress failed: ret=%d\n", ret); + + goto out; +} + +static int iaa_remap_for_verify(struct device *dev, struct iaa_wq *iaa_wq, + struct acomp_req *req, + dma_addr_t *src_addr, dma_addr_t *dst_addr) +{ + int ret = 0; + int nr_sgs; + + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); + + nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); + if (nr_sgs <= 0 || nr_sgs > 1) { + dev_dbg(dev, "verify: couldn't map src sg for iaa device %d," + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, + iaa_wq->wq->id, ret); + ret = -EIO; + goto out; + } + *src_addr = sg_dma_address(req->src); + dev_dbg(dev, "verify: dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," + " req->slen %d, sg_dma_len(sg) %d\n", *src_addr, nr_sgs, + req->src, req->slen, sg_dma_len(req->src)); + + nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE); + if (nr_sgs <= 0 || nr_sgs > 1) { + dev_dbg(dev, "verify: couldn't map dst sg for iaa device %d," + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, + iaa_wq->wq->id, ret); + ret = -EIO; + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); + goto out; + } + *dst_addr = sg_dma_address(req->dst); + dev_dbg(dev, "verify: dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," + " req->dlen %d, sg_dma_len(sg) %d\n", *dst_addr, nr_sgs, + req->dst, req->dlen, sg_dma_len(req->dst)); +out: + return ret; +} + +static int iaa_compress_verify(struct crypto_tfm *tfm, struct acomp_req *req, + struct idxd_wq *wq, + dma_addr_t src_addr, unsigned int slen, + dma_addr_t dst_addr, unsigned int *dlen, + u32 compression_crc) +{ + struct iaa_device_compression_mode *active_compression_mode; + struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); + struct iaa_device *iaa_device; + struct idxd_desc *idxd_desc; + struct iax_hw_desc *desc; + struct idxd_device *idxd; + struct iaa_wq *iaa_wq; + struct pci_dev *pdev; + struct device *dev; + int ret = 0; + + iaa_wq = idxd_wq_get_private(wq); + iaa_device = iaa_wq->iaa_device; + idxd = iaa_device->idxd; + pdev = idxd->pdev; + dev = &pdev->dev; + + active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); + + idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(idxd_desc)) { + dev_dbg(dev, "idxd descriptor allocation failed\n"); + dev_dbg(dev, "iaa compress failed: ret=%ld\n", + PTR_ERR(idxd_desc)); + return PTR_ERR(idxd_desc); + } + desc = idxd_desc->iax_hw; + + /* Verify (optional) - decompress and check crc, suppress dest write */ + + desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC; + desc->opcode = IAX_OPCODE_DECOMPRESS; + desc->decompr_flags = IAA_DECOMP_FLAGS | IAA_DECOMP_SUPPRESS_OUTPUT; + desc->priv = 0; + + desc->src1_addr = (u64)dst_addr; + desc->src1_size = *dlen; + desc->dst_addr = (u64)src_addr; + desc->max_dst_size = slen; + desc->completion_addr = idxd_desc->compl_dma; + + dev_dbg(dev, "(verify) compression mode %s," + " desc->src1_addr %llx, desc->src1_size %d," + " desc->dst_addr %llx, desc->max_dst_size %d," + " desc->src2_addr %llx, desc->src2_size %d\n", + active_compression_mode->name, + desc->src1_addr, desc->src1_size, desc->dst_addr, + desc->max_dst_size, desc->src2_addr, desc->src2_size); + + ret = idxd_submit_desc(wq, idxd_desc); + if (ret) { + dev_dbg(dev, "submit_desc (verify) failed ret=%d\n", ret); + goto err; + } + + ret = check_completion(dev, idxd_desc->iax_completion, false, false); + if (ret) { + dev_dbg(dev, "(verify) check_completion failed ret=%d\n", ret); + goto err; + } + + if (compression_crc != idxd_desc->iax_completion->crc) { + ret = -EINVAL; + dev_dbg(dev, "(verify) iaa comp/decomp crc mismatch:" + " comp=0x%x, decomp=0x%x\n", compression_crc, + idxd_desc->iax_completion->crc); + print_hex_dump(KERN_INFO, "cmp-rec: ", DUMP_PREFIX_OFFSET, + 8, 1, idxd_desc->iax_completion, 64, 0); + goto err; + } + + idxd_free_desc(wq, idxd_desc); +out: + return ret; +err: + idxd_free_desc(wq, idxd_desc); + dev_dbg(dev, "iaa compress failed: ret=%d\n", ret); + + goto out; +} + +static int iaa_decompress(struct crypto_tfm *tfm, struct acomp_req *req, + struct idxd_wq *wq, + dma_addr_t src_addr, unsigned int slen, + dma_addr_t dst_addr, unsigned int *dlen, + bool disable_async) +{ + struct iaa_device_compression_mode *active_compression_mode; + struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); + struct iaa_device *iaa_device; + struct idxd_desc *idxd_desc; + struct iax_hw_desc *desc; + struct idxd_device *idxd; + struct iaa_wq *iaa_wq; + struct pci_dev *pdev; + struct device *dev; + int ret = 0; + + iaa_wq = idxd_wq_get_private(wq); + iaa_device = iaa_wq->iaa_device; + idxd = iaa_device->idxd; + pdev = idxd->pdev; + dev = &pdev->dev; + + active_compression_mode = get_iaa_device_compression_mode(iaa_device, ctx->mode); + + idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); + if (IS_ERR(idxd_desc)) { + dev_dbg(dev, "idxd descriptor allocation failed\n"); + dev_dbg(dev, "iaa decompress failed: ret=%ld\n", + PTR_ERR(idxd_desc)); + return PTR_ERR(idxd_desc); + } + desc = idxd_desc->iax_hw; + + desc->flags = IDXD_OP_FLAG_CRAV | IDXD_OP_FLAG_RCR | IDXD_OP_FLAG_CC; + desc->opcode = IAX_OPCODE_DECOMPRESS; + desc->max_dst_size = PAGE_SIZE; + desc->decompr_flags = IAA_DECOMP_FLAGS; + desc->priv = 0; + + desc->src1_addr = (u64)src_addr; + desc->dst_addr = (u64)dst_addr; + desc->max_dst_size = *dlen; + desc->src1_size = slen; + desc->completion_addr = idxd_desc->compl_dma; + + if (ctx->use_irq && !disable_async) { + desc->flags |= IDXD_OP_FLAG_RCI; + + idxd_desc->crypto.req = req; + idxd_desc->crypto.tfm = tfm; + idxd_desc->crypto.src_addr = src_addr; + idxd_desc->crypto.dst_addr = dst_addr; + idxd_desc->crypto.compress = false; + + dev_dbg(dev, "%s: use_async_irq compression mode %s," + " src_addr %llx, dst_addr %llx\n", __func__, + active_compression_mode->name, + src_addr, dst_addr); + } else if (ctx->async_mode && !disable_async) + req->base.data = idxd_desc; + + dev_dbg(dev, "%s: decompression mode %s," + " desc->src1_addr %llx, desc->src1_size %d," + " desc->dst_addr %llx, desc->max_dst_size %d," + " desc->src2_addr %llx, desc->src2_size %d\n", __func__, + active_compression_mode->name, + desc->src1_addr, desc->src1_size, desc->dst_addr, + desc->max_dst_size, desc->src2_addr, desc->src2_size); + + ret = idxd_submit_desc(wq, idxd_desc); + if (ret) { + dev_dbg(dev, "submit_desc failed ret=%d\n", ret); + goto err; + } + + /* Update stats */ + update_total_decomp_calls(); + update_wq_decomp_calls(wq); + + if (ctx->async_mode && !disable_async) { + ret = -EINPROGRESS; + dev_dbg(dev, "%s: returning -EINPROGRESS\n", __func__); + goto out; + } + + ret = check_completion(dev, idxd_desc->iax_completion, false, false); + if (ret) { + dev_dbg(dev, "%s: check_completion failed ret=%d\n", __func__, ret); + if (idxd_desc->iax_completion->status == IAA_ANALYTICS_ERROR) { + pr_warn("%s: falling back to deflate-generic decompress, " + "analytics error code %x\n", __func__, + idxd_desc->iax_completion->error_code); + ret = deflate_generic_decompress(req); + if (ret) { + dev_dbg(dev, "%s: deflate-generic failed ret=%d\n", + __func__, ret); + goto err; + } + } else { + goto err; + } + } else { + req->dlen = idxd_desc->iax_completion->output_size; + } + + *dlen = req->dlen; + + if (!ctx->async_mode || disable_async) + idxd_free_desc(wq, idxd_desc); + + /* Update stats */ + update_total_decomp_bytes_in(slen); + update_wq_decomp_bytes(wq, slen); +out: + return ret; +err: + idxd_free_desc(wq, idxd_desc); + dev_dbg(dev, "iaa decompress failed: ret=%d\n", ret); + + goto out; +} + +static int iaa_comp_acompress(struct acomp_req *req) +{ + struct iaa_compression_ctx *compression_ctx; + struct crypto_tfm *tfm = req->base.tfm; + dma_addr_t src_addr, dst_addr; + bool disable_async = false; + int nr_sgs, cpu, ret = 0; + struct iaa_wq *iaa_wq; + u32 compression_crc; + struct idxd_wq *wq; + struct device *dev; + int order = -1; + + compression_ctx = crypto_tfm_ctx(tfm); + + if (!iaa_crypto_enabled) { + pr_debug("iaa_crypto disabled, not compressing\n"); + return -ENODEV; + } + + if (!req->src || !req->slen) { + pr_debug("invalid src, not compressing\n"); + return -EINVAL; + } + + cpu = get_cpu(); + wq = wq_table_next_wq(cpu); + put_cpu(); + if (!wq) { + pr_debug("no wq configured for cpu=%d\n", cpu); + return -ENODEV; + } + + ret = iaa_wq_get(wq); + if (ret) { + pr_debug("no wq available for cpu=%d\n", cpu); + return -ENODEV; + } + + iaa_wq = idxd_wq_get_private(wq); + + if (!req->dst) { + gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + + /* incompressible data will always be < 2 * slen */ + req->dlen = 2 * req->slen; + order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE); + req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL); + if (!req->dst) { + ret = -ENOMEM; + order = -1; + goto out; + } + disable_async = true; + } + + dev = &wq->idxd->pdev->dev; + + nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); + if (nr_sgs <= 0 || nr_sgs > 1) { + dev_dbg(dev, "couldn't map src sg for iaa device %d," + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, + iaa_wq->wq->id, ret); + ret = -EIO; + goto out; + } + src_addr = sg_dma_address(req->src); + dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," + " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs, + req->src, req->slen, sg_dma_len(req->src)); + + nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); + if (nr_sgs <= 0 || nr_sgs > 1) { + dev_dbg(dev, "couldn't map dst sg for iaa device %d," + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, + iaa_wq->wq->id, ret); + ret = -EIO; + goto err_map_dst; + } + dst_addr = sg_dma_address(req->dst); + dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," + " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, + req->dst, req->dlen, sg_dma_len(req->dst)); + + ret = iaa_compress(tfm, req, wq, src_addr, req->slen, dst_addr, + &req->dlen, &compression_crc, disable_async); + if (ret == -EINPROGRESS) + return ret; + + if (!ret && compression_ctx->verify_compress) { + ret = iaa_remap_for_verify(dev, iaa_wq, req, &src_addr, &dst_addr); + if (ret) { + dev_dbg(dev, "%s: compress verify remap failed ret=%d\n", __func__, ret); + goto out; + } + + ret = iaa_compress_verify(tfm, req, wq, src_addr, req->slen, + dst_addr, &req->dlen, compression_crc); + if (ret) + dev_dbg(dev, "asynchronous compress verification failed ret=%d\n", ret); + + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_TO_DEVICE); + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_FROM_DEVICE); + + goto out; + } + + if (ret) + dev_dbg(dev, "asynchronous compress failed ret=%d\n", ret); + + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); +err_map_dst: + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); +out: + iaa_wq_put(wq); + + if (order >= 0) + sgl_free_order(req->dst, order); + + return ret; +} + +static int iaa_comp_adecompress_alloc_dest(struct acomp_req *req) +{ + gfp_t flags = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + GFP_KERNEL : GFP_ATOMIC; + struct crypto_tfm *tfm = req->base.tfm; + dma_addr_t src_addr, dst_addr; + int nr_sgs, cpu, ret = 0; + struct iaa_wq *iaa_wq; + struct device *dev; + struct idxd_wq *wq; + int order = -1; + + cpu = get_cpu(); + wq = wq_table_next_wq(cpu); + put_cpu(); + if (!wq) { + pr_debug("no wq configured for cpu=%d\n", cpu); + return -ENODEV; + } + + ret = iaa_wq_get(wq); + if (ret) { + pr_debug("no wq available for cpu=%d\n", cpu); + return -ENODEV; + } + + iaa_wq = idxd_wq_get_private(wq); + + dev = &wq->idxd->pdev->dev; + + nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); + if (nr_sgs <= 0 || nr_sgs > 1) { + dev_dbg(dev, "couldn't map src sg for iaa device %d," + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, + iaa_wq->wq->id, ret); + ret = -EIO; + goto out; + } + src_addr = sg_dma_address(req->src); + dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," + " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs, + req->src, req->slen, sg_dma_len(req->src)); + + req->dlen = 4 * req->slen; /* start with ~avg comp rato */ +alloc_dest: + order = order_base_2(round_up(req->dlen, PAGE_SIZE) / PAGE_SIZE); + req->dst = sgl_alloc_order(req->dlen, order, false, flags, NULL); + if (!req->dst) { + ret = -ENOMEM; + order = -1; + goto out; + } + + nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); + if (nr_sgs <= 0 || nr_sgs > 1) { + dev_dbg(dev, "couldn't map dst sg for iaa device %d," + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, + iaa_wq->wq->id, ret); + ret = -EIO; + goto err_map_dst; + } + + dst_addr = sg_dma_address(req->dst); + dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," + " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, + req->dst, req->dlen, sg_dma_len(req->dst)); + ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, + dst_addr, &req->dlen, true); + if (ret == -EOVERFLOW) { + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); + req->dlen *= 2; + if (req->dlen > CRYPTO_ACOMP_DST_MAX) + goto err_map_dst; + goto alloc_dest; + } + + if (ret != 0) + dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret); + + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); +err_map_dst: + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); +out: + iaa_wq_put(wq); + + if (order >= 0) + sgl_free_order(req->dst, order); + + return ret; +} + +static int iaa_comp_adecompress(struct acomp_req *req) +{ + struct crypto_tfm *tfm = req->base.tfm; + dma_addr_t src_addr, dst_addr; + int nr_sgs, cpu, ret = 0; + struct iaa_wq *iaa_wq; + struct device *dev; + struct idxd_wq *wq; + + if (!iaa_crypto_enabled) { + pr_debug("iaa_crypto disabled, not decompressing\n"); + return -ENODEV; + } + + if (!req->src || !req->slen) { + pr_debug("invalid src, not decompressing\n"); + return -EINVAL; + } + + if (!req->dst) + return iaa_comp_adecompress_alloc_dest(req); + + cpu = get_cpu(); + wq = wq_table_next_wq(cpu); + put_cpu(); + if (!wq) { + pr_debug("no wq configured for cpu=%d\n", cpu); + return -ENODEV; + } + + ret = iaa_wq_get(wq); + if (ret) { + pr_debug("no wq available for cpu=%d\n", cpu); + return -ENODEV; + } + + iaa_wq = idxd_wq_get_private(wq); + + dev = &wq->idxd->pdev->dev; + + nr_sgs = dma_map_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); + if (nr_sgs <= 0 || nr_sgs > 1) { + dev_dbg(dev, "couldn't map src sg for iaa device %d," + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, + iaa_wq->wq->id, ret); + ret = -EIO; + goto out; + } + src_addr = sg_dma_address(req->src); + dev_dbg(dev, "dma_map_sg, src_addr %llx, nr_sgs %d, req->src %p," + " req->slen %d, sg_dma_len(sg) %d\n", src_addr, nr_sgs, + req->src, req->slen, sg_dma_len(req->src)); + + nr_sgs = dma_map_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); + if (nr_sgs <= 0 || nr_sgs > 1) { + dev_dbg(dev, "couldn't map dst sg for iaa device %d," + " wq %d: ret=%d\n", iaa_wq->iaa_device->idxd->id, + iaa_wq->wq->id, ret); + ret = -EIO; + goto err_map_dst; + } + dst_addr = sg_dma_address(req->dst); + dev_dbg(dev, "dma_map_sg, dst_addr %llx, nr_sgs %d, req->dst %p," + " req->dlen %d, sg_dma_len(sg) %d\n", dst_addr, nr_sgs, + req->dst, req->dlen, sg_dma_len(req->dst)); + + ret = iaa_decompress(tfm, req, wq, src_addr, req->slen, + dst_addr, &req->dlen, false); + if (ret == -EINPROGRESS) + return ret; + + if (ret != 0) + dev_dbg(dev, "asynchronous decompress failed ret=%d\n", ret); + + dma_unmap_sg(dev, req->dst, sg_nents(req->dst), DMA_FROM_DEVICE); +err_map_dst: + dma_unmap_sg(dev, req->src, sg_nents(req->src), DMA_TO_DEVICE); +out: + iaa_wq_put(wq); + + return ret; +} + +static void compression_ctx_init(struct iaa_compression_ctx *ctx) +{ + ctx->verify_compress = iaa_verify_compress; + ctx->async_mode = async_mode; + ctx->use_irq = use_irq; +} + +static int iaa_comp_init_fixed(struct crypto_acomp *acomp_tfm) +{ + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct iaa_compression_ctx *ctx = crypto_tfm_ctx(tfm); + + compression_ctx_init(ctx); + + ctx->mode = IAA_MODE_FIXED; + + return 0; +} + +static void dst_free(struct scatterlist *sgl) +{ + /* + * Called for req->dst = NULL cases but we free elsewhere + * using sgl_free_order(). + */ +} + +static struct acomp_alg iaa_acomp_fixed_deflate = { + .init = iaa_comp_init_fixed, + .compress = iaa_comp_acompress, + .decompress = iaa_comp_adecompress, + .dst_free = dst_free, + .base = { + .cra_name = "deflate", + .cra_driver_name = "deflate-iaa", + .cra_ctxsize = sizeof(struct iaa_compression_ctx), + .cra_module = THIS_MODULE, + .cra_priority = IAA_ALG_PRIORITY, + } +}; + +static int iaa_register_compression_device(void) +{ + int ret; + + ret = crypto_register_acomp(&iaa_acomp_fixed_deflate); + if (ret) { + pr_err("deflate algorithm acomp fixed registration failed (%d)\n", ret); + goto out; + } + + iaa_crypto_registered = true; +out: + return ret; +} + +static int iaa_unregister_compression_device(void) +{ + if (iaa_crypto_registered) + crypto_unregister_acomp(&iaa_acomp_fixed_deflate); + + return 0; +} + +static int iaa_crypto_probe(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + struct idxd_device *idxd = wq->idxd; + struct idxd_driver_data *data = idxd->data; + struct device *dev = &idxd_dev->conf_dev; + bool first_wq = false; + int ret = 0; + + if (idxd->state != IDXD_DEV_ENABLED) + return -ENXIO; + + if (data->type != IDXD_TYPE_IAX) + return -ENODEV; + + mutex_lock(&wq->wq_lock); + + if (idxd_wq_get_private(wq)) { + mutex_unlock(&wq->wq_lock); + return -EBUSY; + } + + if (!idxd_wq_driver_name_match(wq, dev)) { + dev_dbg(dev, "wq %d.%d driver_name match failed: wq driver_name %s, dev driver name %s\n", + idxd->id, wq->id, wq->driver_name, dev->driver->name); + idxd->cmd_status = IDXD_SCMD_WQ_NO_DRV_NAME; + ret = -ENODEV; + goto err; + } + + wq->type = IDXD_WQT_KERNEL; + + ret = idxd_drv_enable_wq(wq); + if (ret < 0) { + dev_dbg(dev, "enable wq %d.%d failed: %d\n", + idxd->id, wq->id, ret); + ret = -ENXIO; + goto err; + } + + mutex_lock(&iaa_devices_lock); + + if (list_empty(&iaa_devices)) { + ret = alloc_wq_table(wq->idxd->max_wqs); + if (ret) + goto err_alloc; + first_wq = true; + } + + ret = save_iaa_wq(wq); + if (ret) + goto err_save; + + rebalance_wq_table(); + + if (first_wq) { + iaa_crypto_enabled = true; + ret = iaa_register_compression_device(); + if (ret != 0) { + iaa_crypto_enabled = false; + dev_dbg(dev, "IAA compression device registration failed\n"); + goto err_register; + } + try_module_get(THIS_MODULE); + + pr_info("iaa_crypto now ENABLED\n"); + } + + mutex_unlock(&iaa_devices_lock); +out: + mutex_unlock(&wq->wq_lock); + + return ret; + +err_register: + remove_iaa_wq(wq); + free_iaa_wq(idxd_wq_get_private(wq)); +err_save: + if (first_wq) + free_wq_table(); +err_alloc: + mutex_unlock(&iaa_devices_lock); + idxd_drv_disable_wq(wq); +err: + wq->type = IDXD_WQT_NONE; + + goto out; +} + +static void iaa_crypto_remove(struct idxd_dev *idxd_dev) +{ + struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev); + struct idxd_device *idxd = wq->idxd; + struct iaa_wq *iaa_wq; + bool free = false; + + idxd_wq_quiesce(wq); + + mutex_lock(&wq->wq_lock); + mutex_lock(&iaa_devices_lock); + + remove_iaa_wq(wq); + + spin_lock(&idxd->dev_lock); + iaa_wq = idxd_wq_get_private(wq); + if (!iaa_wq) { + spin_unlock(&idxd->dev_lock); + pr_err("%s: no iaa_wq available to remove\n", __func__); + goto out; + } + + if (iaa_wq->ref) { + iaa_wq->remove = true; + } else { + wq = iaa_wq->wq; + idxd_wq_set_private(wq, NULL); + free = true; + } + spin_unlock(&idxd->dev_lock); + if (free) { + __free_iaa_wq(iaa_wq); + kfree(iaa_wq); + } + + idxd_drv_disable_wq(wq); + rebalance_wq_table(); + + if (nr_iaa == 0) { + iaa_crypto_enabled = false; + free_wq_table(); + module_put(THIS_MODULE); + + pr_info("iaa_crypto now DISABLED\n"); + } +out: + mutex_unlock(&iaa_devices_lock); + mutex_unlock(&wq->wq_lock); +} + +static enum idxd_dev_type dev_types[] = { + IDXD_DEV_WQ, + IDXD_DEV_NONE, +}; + +static struct idxd_device_driver iaa_crypto_driver = { + .probe = iaa_crypto_probe, + .remove = iaa_crypto_remove, + .name = IDXD_SUBDRIVER_NAME, + .type = dev_types, + .desc_complete = iaa_desc_complete, +}; + +static int __init iaa_crypto_init_module(void) +{ + int ret = 0; + int node; + + nr_cpus = num_online_cpus(); + for_each_node_with_cpus(node) + nr_nodes++; + if (!nr_nodes) { + pr_err("IAA couldn't find any nodes with cpus\n"); + return -ENODEV; + } + nr_cpus_per_node = nr_cpus / nr_nodes; + + if (crypto_has_comp("deflate-generic", 0, 0)) + deflate_generic_tfm = crypto_alloc_comp("deflate-generic", 0, 0); + + if (IS_ERR_OR_NULL(deflate_generic_tfm)) { + pr_err("IAA could not alloc %s tfm: errcode = %ld\n", + "deflate-generic", PTR_ERR(deflate_generic_tfm)); + return -ENOMEM; + } + + ret = iaa_aecs_init_fixed(); + if (ret < 0) { + pr_debug("IAA fixed compression mode init failed\n"); + goto err_aecs_init; + } + + ret = idxd_driver_register(&iaa_crypto_driver); + if (ret) { + pr_debug("IAA wq sub-driver registration failed\n"); + goto err_driver_reg; + } + + ret = driver_create_file(&iaa_crypto_driver.drv, + &driver_attr_verify_compress); + if (ret) { + pr_debug("IAA verify_compress attr creation failed\n"); + goto err_verify_attr_create; + } + + ret = driver_create_file(&iaa_crypto_driver.drv, + &driver_attr_sync_mode); + if (ret) { + pr_debug("IAA sync mode attr creation failed\n"); + goto err_sync_attr_create; + } + + if (iaa_crypto_debugfs_init()) + pr_warn("debugfs init failed, stats not available\n"); + + pr_debug("initialized\n"); +out: + return ret; + +err_sync_attr_create: + driver_remove_file(&iaa_crypto_driver.drv, + &driver_attr_verify_compress); +err_verify_attr_create: + idxd_driver_unregister(&iaa_crypto_driver); +err_driver_reg: + iaa_aecs_cleanup_fixed(); +err_aecs_init: + crypto_free_comp(deflate_generic_tfm); + + goto out; +} + +static void __exit iaa_crypto_cleanup_module(void) +{ + if (iaa_unregister_compression_device()) + pr_debug("IAA compression device unregister failed\n"); + + iaa_crypto_debugfs_cleanup(); + driver_remove_file(&iaa_crypto_driver.drv, + &driver_attr_sync_mode); + driver_remove_file(&iaa_crypto_driver.drv, + &driver_attr_verify_compress); + idxd_driver_unregister(&iaa_crypto_driver); + iaa_aecs_cleanup_fixed(); + crypto_free_comp(deflate_generic_tfm); + + pr_debug("cleaned up\n"); +} + +MODULE_IMPORT_NS(IDXD); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_IDXD_DEVICE(0); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("IAA Compression Accelerator Crypto Driver"); + +module_init(iaa_crypto_init_module); +module_exit(iaa_crypto_cleanup_module); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c new file mode 100644 index 0000000000..2e3b7b73af --- /dev/null +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/highmem.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/smp.h> +#include <uapi/linux/idxd.h> +#include <linux/idxd.h> +#include <linux/dmaengine.h> +#include "../../dma/idxd/idxd.h" +#include <linux/debugfs.h> +#include <crypto/internal/acompress.h> +#include "iaa_crypto.h" +#include "iaa_crypto_stats.h" + +static u64 total_comp_calls; +static u64 total_decomp_calls; +static u64 total_sw_decomp_calls; +static u64 max_comp_delay_ns; +static u64 max_decomp_delay_ns; +static u64 max_acomp_delay_ns; +static u64 max_adecomp_delay_ns; +static u64 total_comp_bytes_out; +static u64 total_decomp_bytes_in; +static u64 total_completion_einval_errors; +static u64 total_completion_timeout_errors; +static u64 total_completion_comp_buf_overflow_errors; + +static struct dentry *iaa_crypto_debugfs_root; + +void update_total_comp_calls(void) +{ + total_comp_calls++; +} + +void update_total_comp_bytes_out(int n) +{ + total_comp_bytes_out += n; +} + +void update_total_decomp_calls(void) +{ + total_decomp_calls++; +} + +void update_total_sw_decomp_calls(void) +{ + total_sw_decomp_calls++; +} + +void update_total_decomp_bytes_in(int n) +{ + total_decomp_bytes_in += n; +} + +void update_completion_einval_errs(void) +{ + total_completion_einval_errors++; +} + +void update_completion_timeout_errs(void) +{ + total_completion_timeout_errors++; +} + +void update_completion_comp_buf_overflow_errs(void) +{ + total_completion_comp_buf_overflow_errors++; +} + +void update_max_comp_delay_ns(u64 start_time_ns) +{ + u64 time_diff; + + time_diff = ktime_get_ns() - start_time_ns; + + if (time_diff > max_comp_delay_ns) + max_comp_delay_ns = time_diff; +} + +void update_max_decomp_delay_ns(u64 start_time_ns) +{ + u64 time_diff; + + time_diff = ktime_get_ns() - start_time_ns; + + if (time_diff > max_decomp_delay_ns) + max_decomp_delay_ns = time_diff; +} + +void update_max_acomp_delay_ns(u64 start_time_ns) +{ + u64 time_diff; + + time_diff = ktime_get_ns() - start_time_ns; + + if (time_diff > max_acomp_delay_ns) + max_acomp_delay_ns = time_diff; +} + +void update_max_adecomp_delay_ns(u64 start_time_ns) +{ + u64 time_diff; + + time_diff = ktime_get_ns() - start_time_ns; + + if (time_diff > max_adecomp_delay_ns) + max_adecomp_delay_ns = time_diff; +} + +void update_wq_comp_calls(struct idxd_wq *idxd_wq) +{ + struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); + + wq->comp_calls++; + wq->iaa_device->comp_calls++; +} + +void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) +{ + struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); + + wq->comp_bytes += n; + wq->iaa_device->comp_bytes += n; +} + +void update_wq_decomp_calls(struct idxd_wq *idxd_wq) +{ + struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); + + wq->decomp_calls++; + wq->iaa_device->decomp_calls++; +} + +void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) +{ + struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); + + wq->decomp_bytes += n; + wq->iaa_device->decomp_bytes += n; +} + +static void reset_iaa_crypto_stats(void) +{ + total_comp_calls = 0; + total_decomp_calls = 0; + total_sw_decomp_calls = 0; + max_comp_delay_ns = 0; + max_decomp_delay_ns = 0; + max_acomp_delay_ns = 0; + max_adecomp_delay_ns = 0; + total_comp_bytes_out = 0; + total_decomp_bytes_in = 0; + total_completion_einval_errors = 0; + total_completion_timeout_errors = 0; + total_completion_comp_buf_overflow_errors = 0; +} + +static void reset_wq_stats(struct iaa_wq *wq) +{ + wq->comp_calls = 0; + wq->comp_bytes = 0; + wq->decomp_calls = 0; + wq->decomp_bytes = 0; +} + +static void reset_device_stats(struct iaa_device *iaa_device) +{ + struct iaa_wq *iaa_wq; + + iaa_device->comp_calls = 0; + iaa_device->comp_bytes = 0; + iaa_device->decomp_calls = 0; + iaa_device->decomp_bytes = 0; + + list_for_each_entry(iaa_wq, &iaa_device->wqs, list) + reset_wq_stats(iaa_wq); +} + +static void wq_show(struct seq_file *m, struct iaa_wq *iaa_wq) +{ + seq_printf(m, " name: %s\n", iaa_wq->wq->name); + seq_printf(m, " comp_calls: %llu\n", iaa_wq->comp_calls); + seq_printf(m, " comp_bytes: %llu\n", iaa_wq->comp_bytes); + seq_printf(m, " decomp_calls: %llu\n", iaa_wq->decomp_calls); + seq_printf(m, " decomp_bytes: %llu\n\n", iaa_wq->decomp_bytes); +} + +static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) +{ + struct iaa_wq *iaa_wq; + + seq_puts(m, "iaa device:\n"); + seq_printf(m, " id: %d\n", iaa_device->idxd->id); + seq_printf(m, " n_wqs: %d\n", iaa_device->n_wq); + seq_printf(m, " comp_calls: %llu\n", iaa_device->comp_calls); + seq_printf(m, " comp_bytes: %llu\n", iaa_device->comp_bytes); + seq_printf(m, " decomp_calls: %llu\n", iaa_device->decomp_calls); + seq_printf(m, " decomp_bytes: %llu\n", iaa_device->decomp_bytes); + seq_puts(m, " wqs:\n"); + + list_for_each_entry(iaa_wq, &iaa_device->wqs, list) + wq_show(m, iaa_wq); +} + +static void global_stats_show(struct seq_file *m) +{ + seq_puts(m, "global stats:\n"); + seq_printf(m, " total_comp_calls: %llu\n", total_comp_calls); + seq_printf(m, " total_decomp_calls: %llu\n", total_decomp_calls); + seq_printf(m, " total_sw_decomp_calls: %llu\n", total_sw_decomp_calls); + seq_printf(m, " total_comp_bytes_out: %llu\n", total_comp_bytes_out); + seq_printf(m, " total_decomp_bytes_in: %llu\n", total_decomp_bytes_in); + seq_printf(m, " total_completion_einval_errors: %llu\n", + total_completion_einval_errors); + seq_printf(m, " total_completion_timeout_errors: %llu\n", + total_completion_timeout_errors); + seq_printf(m, " total_completion_comp_buf_overflow_errors: %llu\n\n", + total_completion_comp_buf_overflow_errors); +} + +static int wq_stats_show(struct seq_file *m, void *v) +{ + struct iaa_device *iaa_device; + + mutex_lock(&iaa_devices_lock); + + global_stats_show(m); + + list_for_each_entry(iaa_device, &iaa_devices, list) + device_stats_show(m, iaa_device); + + mutex_unlock(&iaa_devices_lock); + + return 0; +} + +static int iaa_crypto_stats_reset(void *data, u64 value) +{ + struct iaa_device *iaa_device; + + reset_iaa_crypto_stats(); + + mutex_lock(&iaa_devices_lock); + + list_for_each_entry(iaa_device, &iaa_devices, list) + reset_device_stats(iaa_device); + + mutex_unlock(&iaa_devices_lock); + + return 0; +} + +static int wq_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, wq_stats_show, file); +} + +static const struct file_operations wq_stats_fops = { + .open = wq_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +DEFINE_DEBUGFS_ATTRIBUTE(wq_stats_reset_fops, NULL, iaa_crypto_stats_reset, "%llu\n"); + +int __init iaa_crypto_debugfs_init(void) +{ + if (!debugfs_initialized()) + return -ENODEV; + + iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL); + if (!iaa_crypto_debugfs_root) + return -ENOMEM; + + debugfs_create_u64("max_comp_delay_ns", 0644, + iaa_crypto_debugfs_root, &max_comp_delay_ns); + debugfs_create_u64("max_decomp_delay_ns", 0644, + iaa_crypto_debugfs_root, &max_decomp_delay_ns); + debugfs_create_u64("max_acomp_delay_ns", 0644, + iaa_crypto_debugfs_root, &max_comp_delay_ns); + debugfs_create_u64("max_adecomp_delay_ns", 0644, + iaa_crypto_debugfs_root, &max_decomp_delay_ns); + debugfs_create_u64("total_comp_calls", 0644, + iaa_crypto_debugfs_root, &total_comp_calls); + debugfs_create_u64("total_decomp_calls", 0644, + iaa_crypto_debugfs_root, &total_decomp_calls); + debugfs_create_u64("total_sw_decomp_calls", 0644, + iaa_crypto_debugfs_root, &total_sw_decomp_calls); + debugfs_create_u64("total_comp_bytes_out", 0644, + iaa_crypto_debugfs_root, &total_comp_bytes_out); + debugfs_create_u64("total_decomp_bytes_in", 0644, + iaa_crypto_debugfs_root, &total_decomp_bytes_in); + debugfs_create_file("wq_stats", 0644, iaa_crypto_debugfs_root, NULL, + &wq_stats_fops); + debugfs_create_file("stats_reset", 0644, iaa_crypto_debugfs_root, NULL, + &wq_stats_reset_fops); + + return 0; +} + +void __exit iaa_crypto_debugfs_cleanup(void) +{ + debugfs_remove_recursive(iaa_crypto_debugfs_root); +} + +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h new file mode 100644 index 0000000000..c10b87b86f --- /dev/null +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */ + +#ifndef __CRYPTO_DEV_IAA_CRYPTO_STATS_H__ +#define __CRYPTO_DEV_IAA_CRYPTO_STATS_H__ + +#if defined(CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS) +int iaa_crypto_debugfs_init(void); +void iaa_crypto_debugfs_cleanup(void); + +void update_total_comp_calls(void); +void update_total_comp_bytes_out(int n); +void update_total_decomp_calls(void); +void update_total_sw_decomp_calls(void); +void update_total_decomp_bytes_in(int n); +void update_max_comp_delay_ns(u64 start_time_ns); +void update_max_decomp_delay_ns(u64 start_time_ns); +void update_max_acomp_delay_ns(u64 start_time_ns); +void update_max_adecomp_delay_ns(u64 start_time_ns); +void update_completion_einval_errs(void); +void update_completion_timeout_errs(void); +void update_completion_comp_buf_overflow_errs(void); + +void update_wq_comp_calls(struct idxd_wq *idxd_wq); +void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n); +void update_wq_decomp_calls(struct idxd_wq *idxd_wq); +void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n); + +#else +static inline int iaa_crypto_debugfs_init(void) { return 0; } +static inline void iaa_crypto_debugfs_cleanup(void) {} + +static inline void update_total_comp_calls(void) {} +static inline void update_total_comp_bytes_out(int n) {} +static inline void update_total_decomp_calls(void) {} +static inline void update_total_sw_decomp_calls(void) {} +static inline void update_total_decomp_bytes_in(int n) {} +static inline void update_max_comp_delay_ns(u64 start_time_ns) {} +static inline void update_max_decomp_delay_ns(u64 start_time_ns) {} +static inline void update_max_acomp_delay_ns(u64 start_time_ns) {} +static inline void update_max_adecomp_delay_ns(u64 start_time_ns) {} +static inline void update_completion_einval_errs(void) {} +static inline void update_completion_timeout_errs(void) {} +static inline void update_completion_comp_buf_overflow_errs(void) {} + +static inline void update_wq_comp_calls(struct idxd_wq *idxd_wq) {} +static inline void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) {} +static inline void update_wq_decomp_calls(struct idxd_wq *idxd_wq) {} +static inline void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) {} + +#endif // CONFIG_CRYPTO_DEV_IAA_CRYPTO_STATS + +#endif diff --git a/drivers/crypto/intel/qat/Kconfig b/drivers/crypto/intel/qat/Kconfig index 1220cc86f9..c120f6715a 100644 --- a/drivers/crypto/intel/qat/Kconfig +++ b/drivers/crypto/intel/qat/Kconfig @@ -59,6 +59,17 @@ config CRYPTO_DEV_QAT_4XXX To compile this as a module, choose M here: the module will be called qat_4xxx. +config CRYPTO_DEV_QAT_420XX + tristate "Support for Intel(R) QAT_420XX" + depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST) + select CRYPTO_DEV_QAT + help + Support for Intel(R) QuickAssist Technology QAT_420xx + for accelerating crypto and compression workloads. + + To compile this as a module, choose M here: the module + will be called qat_420xx. + config CRYPTO_DEV_QAT_DH895xCCVF tristate "Support for Intel(R) DH895xCC Virtual Function" depends on PCI && (!CPU_BIG_ENDIAN || COMPILE_TEST) diff --git a/drivers/crypto/intel/qat/Makefile b/drivers/crypto/intel/qat/Makefile index 258c8a626c..235b69f4f3 100644 --- a/drivers/crypto/intel/qat/Makefile +++ b/drivers/crypto/intel/qat/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCC) += qat_dh895xcc/ obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXX) += qat_c3xxx/ obj-$(CONFIG_CRYPTO_DEV_QAT_C62X) += qat_c62x/ obj-$(CONFIG_CRYPTO_DEV_QAT_4XXX) += qat_4xxx/ +obj-$(CONFIG_CRYPTO_DEV_QAT_420XX) += qat_420xx/ obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCCVF) += qat_dh895xccvf/ obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXXVF) += qat_c3xxxvf/ obj-$(CONFIG_CRYPTO_DEV_QAT_C62XVF) += qat_c62xvf/ diff --git a/drivers/crypto/intel/qat/qat_420xx/Makefile b/drivers/crypto/intel/qat/qat_420xx/Makefile new file mode 100644 index 0000000000..a90fbe00b3 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_420xx/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +ccflags-y := -I $(srctree)/$(src)/../qat_common +obj-$(CONFIG_CRYPTO_DEV_QAT_420XX) += qat_420xx.o +qat_420xx-objs := adf_drv.o adf_420xx_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c new file mode 100644 index 0000000000..7909b51e97 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation */ +#include <linux/iopoll.h> +#include <adf_accel_devices.h> +#include <adf_admin.h> +#include <adf_cfg.h> +#include <adf_cfg_services.h> +#include <adf_clock.h> +#include <adf_common_drv.h> +#include <adf_fw_config.h> +#include <adf_gen4_config.h> +#include <adf_gen4_dc.h> +#include <adf_gen4_hw_data.h> +#include <adf_gen4_pfvf.h> +#include <adf_gen4_pm.h> +#include <adf_gen4_ras.h> +#include <adf_gen4_timer.h> +#include <adf_gen4_tl.h> +#include "adf_420xx_hw_data.h" +#include "icp_qat_hw.h" + +#define ADF_AE_GROUP_0 GENMASK(3, 0) +#define ADF_AE_GROUP_1 GENMASK(7, 4) +#define ADF_AE_GROUP_2 GENMASK(11, 8) +#define ADF_AE_GROUP_3 GENMASK(15, 12) +#define ADF_AE_GROUP_4 BIT(16) + +#define ENA_THD_MASK_ASYM GENMASK(1, 0) +#define ENA_THD_MASK_SYM GENMASK(3, 0) +#define ENA_THD_MASK_DC GENMASK(1, 0) + +static const char * const adf_420xx_fw_objs[] = { + [ADF_FW_SYM_OBJ] = ADF_420XX_SYM_OBJ, + [ADF_FW_ASYM_OBJ] = ADF_420XX_ASYM_OBJ, + [ADF_FW_DC_OBJ] = ADF_420XX_DC_OBJ, + [ADF_FW_ADMIN_OBJ] = ADF_420XX_ADMIN_OBJ, +}; + +static const struct adf_fw_config adf_fw_cy_config[] = { + {ADF_AE_GROUP_3, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_2, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_1, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_0, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_4, ADF_FW_ADMIN_OBJ}, +}; + +static const struct adf_fw_config adf_fw_dc_config[] = { + {ADF_AE_GROUP_1, ADF_FW_DC_OBJ}, + {ADF_AE_GROUP_0, ADF_FW_DC_OBJ}, + {ADF_AE_GROUP_4, ADF_FW_ADMIN_OBJ}, +}; + +static const struct adf_fw_config adf_fw_sym_config[] = { + {ADF_AE_GROUP_3, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_2, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_1, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_0, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_4, ADF_FW_ADMIN_OBJ}, +}; + +static const struct adf_fw_config adf_fw_asym_config[] = { + {ADF_AE_GROUP_3, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_2, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_1, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_0, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_4, ADF_FW_ADMIN_OBJ}, +}; + +static const struct adf_fw_config adf_fw_asym_dc_config[] = { + {ADF_AE_GROUP_3, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_2, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_1, ADF_FW_ASYM_OBJ}, + {ADF_AE_GROUP_0, ADF_FW_DC_OBJ}, + {ADF_AE_GROUP_4, ADF_FW_ADMIN_OBJ}, +}; + +static const struct adf_fw_config adf_fw_sym_dc_config[] = { + {ADF_AE_GROUP_2, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_1, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_0, ADF_FW_DC_OBJ}, + {ADF_AE_GROUP_4, ADF_FW_ADMIN_OBJ}, +}; + +static const struct adf_fw_config adf_fw_dcc_config[] = { + {ADF_AE_GROUP_1, ADF_FW_DC_OBJ}, + {ADF_AE_GROUP_0, ADF_FW_SYM_OBJ}, + {ADF_AE_GROUP_4, ADF_FW_ADMIN_OBJ}, +}; + + +static struct adf_hw_device_class adf_420xx_class = { + .name = ADF_420XX_DEVICE_NAME, + .type = DEV_420XX, + .instances = 0, +}; + +static u32 get_ae_mask(struct adf_hw_device_data *self) +{ + u32 me_disable = self->fuses; + + return ~me_disable & ADF_420XX_ACCELENGINES_MASK; +} + +static u32 uof_get_num_objs(struct adf_accel_dev *accel_dev) +{ + switch (adf_get_service_enabled(accel_dev)) { + case SVC_CY: + case SVC_CY2: + return ARRAY_SIZE(adf_fw_cy_config); + case SVC_DC: + return ARRAY_SIZE(adf_fw_dc_config); + case SVC_DCC: + return ARRAY_SIZE(adf_fw_dcc_config); + case SVC_SYM: + return ARRAY_SIZE(adf_fw_sym_config); + case SVC_ASYM: + return ARRAY_SIZE(adf_fw_asym_config); + case SVC_ASYM_DC: + case SVC_DC_ASYM: + return ARRAY_SIZE(adf_fw_asym_dc_config); + case SVC_SYM_DC: + case SVC_DC_SYM: + return ARRAY_SIZE(adf_fw_sym_dc_config); + default: + return 0; + } +} + +static const struct adf_fw_config *get_fw_config(struct adf_accel_dev *accel_dev) +{ + switch (adf_get_service_enabled(accel_dev)) { + case SVC_CY: + case SVC_CY2: + return adf_fw_cy_config; + case SVC_DC: + return adf_fw_dc_config; + case SVC_DCC: + return adf_fw_dcc_config; + case SVC_SYM: + return adf_fw_sym_config; + case SVC_ASYM: + return adf_fw_asym_config; + case SVC_ASYM_DC: + case SVC_DC_ASYM: + return adf_fw_asym_dc_config; + case SVC_SYM_DC: + case SVC_DC_SYM: + return adf_fw_sym_dc_config; + default: + return NULL; + } +} + +static void update_ae_mask(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + const struct adf_fw_config *fw_config; + u32 config_ae_mask = 0; + u32 ae_mask, num_objs; + int i; + + ae_mask = get_ae_mask(hw_data); + + /* Modify the AE mask based on the firmware configuration loaded */ + fw_config = get_fw_config(accel_dev); + num_objs = uof_get_num_objs(accel_dev); + + config_ae_mask |= ADF_420XX_ADMIN_AE_MASK; + for (i = 0; i < num_objs; i++) + config_ae_mask |= fw_config[i].ae_mask; + + hw_data->ae_mask = ae_mask & config_ae_mask; +} + +static u32 get_accel_cap(struct adf_accel_dev *accel_dev) +{ + u32 capabilities_sym, capabilities_asym, capabilities_dc; + struct pci_dev *pdev = accel_dev->accel_pci_dev.pci_dev; + u32 capabilities_dcc; + u32 fusectl1; + + /* As a side effect, update ae_mask based on configuration */ + update_ae_mask(accel_dev); + + /* Read accelerator capabilities mask */ + pci_read_config_dword(pdev, ADF_GEN4_FUSECTL1_OFFSET, &fusectl1); + + capabilities_sym = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC | + ICP_ACCEL_CAPABILITIES_CIPHER | + ICP_ACCEL_CAPABILITIES_AUTHENTICATION | + ICP_ACCEL_CAPABILITIES_SHA3 | + ICP_ACCEL_CAPABILITIES_SHA3_EXT | + ICP_ACCEL_CAPABILITIES_HKDF | + ICP_ACCEL_CAPABILITIES_CHACHA_POLY | + ICP_ACCEL_CAPABILITIES_AESGCM_SPC | + ICP_ACCEL_CAPABILITIES_SM3 | + ICP_ACCEL_CAPABILITIES_SM4 | + ICP_ACCEL_CAPABILITIES_AES_V2 | + ICP_ACCEL_CAPABILITIES_ZUC | + ICP_ACCEL_CAPABILITIES_ZUC_256 | + ICP_ACCEL_CAPABILITIES_WIRELESS_CRYPTO_EXT | + ICP_ACCEL_CAPABILITIES_EXT_ALGCHAIN; + + /* A set bit in fusectl1 means the feature is OFF in this SKU */ + if (fusectl1 & ICP_ACCEL_GEN4_MASK_CIPHER_SLICE) { + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_HKDF; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER; + } + + if (fusectl1 & ICP_ACCEL_GEN4_MASK_UCS_SLICE) { + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CHACHA_POLY; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AESGCM_SPC; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AES_V2; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER; + } + + if (fusectl1 & ICP_ACCEL_GEN4_MASK_AUTH_SLICE) { + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SHA3; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SHA3_EXT; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER; + } + + if (fusectl1 & ICP_ACCEL_GEN4_MASK_SMX_SLICE) { + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SM3; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SM4; + } + + if (fusectl1 & ICP_ACCEL_GEN4_MASK_WCP_WAT_SLICE) { + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC_256; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_WIRELESS_CRYPTO_EXT; + } + + if (fusectl1 & ICP_ACCEL_GEN4_MASK_EIA3_SLICE) { + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC; + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC_256; + } + + if (fusectl1 & ICP_ACCEL_GEN4_MASK_ZUC_256_SLICE) + capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_ZUC_256; + + capabilities_asym = ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC | + ICP_ACCEL_CAPABILITIES_SM2 | + ICP_ACCEL_CAPABILITIES_ECEDMONT; + + if (fusectl1 & ICP_ACCEL_GEN4_MASK_PKE_SLICE) { + capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; + capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_SM2; + capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_ECEDMONT; + } + + capabilities_dc = ICP_ACCEL_CAPABILITIES_COMPRESSION | + ICP_ACCEL_CAPABILITIES_LZ4_COMPRESSION | + ICP_ACCEL_CAPABILITIES_LZ4S_COMPRESSION | + ICP_ACCEL_CAPABILITIES_CNV_INTEGRITY64; + + if (fusectl1 & ICP_ACCEL_GEN4_MASK_COMPRESS_SLICE) { + capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION; + capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_LZ4_COMPRESSION; + capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_LZ4S_COMPRESSION; + capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_CNV_INTEGRITY64; + } + + switch (adf_get_service_enabled(accel_dev)) { + case SVC_CY: + case SVC_CY2: + return capabilities_sym | capabilities_asym; + case SVC_DC: + return capabilities_dc; + case SVC_DCC: + /* + * Sym capabilities are available for chaining operations, + * but sym crypto instances cannot be supported + */ + capabilities_dcc = capabilities_dc | capabilities_sym; + capabilities_dcc &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC; + return capabilities_dcc; + case SVC_SYM: + return capabilities_sym; + case SVC_ASYM: + return capabilities_asym; + case SVC_ASYM_DC: + case SVC_DC_ASYM: + return capabilities_asym | capabilities_dc; + case SVC_SYM_DC: + case SVC_DC_SYM: + return capabilities_sym | capabilities_dc; + default: + return 0; + } +} + +static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev) +{ + if (adf_gen4_init_thd2arb_map(accel_dev)) + dev_warn(&GET_DEV(accel_dev), + "Generate of the thread to arbiter map failed"); + + return GET_HW_DATA(accel_dev)->thd_to_arb_map; +} + +static void adf_init_rl_data(struct adf_rl_hw_data *rl_data) +{ + rl_data->pciout_tb_offset = ADF_GEN4_RL_TOKEN_PCIEOUT_BUCKET_OFFSET; + rl_data->pciin_tb_offset = ADF_GEN4_RL_TOKEN_PCIEIN_BUCKET_OFFSET; + rl_data->r2l_offset = ADF_GEN4_RL_R2L_OFFSET; + rl_data->l2c_offset = ADF_GEN4_RL_L2C_OFFSET; + rl_data->c2s_offset = ADF_GEN4_RL_C2S_OFFSET; + + rl_data->pcie_scale_div = ADF_420XX_RL_PCIE_SCALE_FACTOR_DIV; + rl_data->pcie_scale_mul = ADF_420XX_RL_PCIE_SCALE_FACTOR_MUL; + rl_data->dcpr_correction = ADF_420XX_RL_DCPR_CORRECTION; + rl_data->max_tp[ADF_SVC_ASYM] = ADF_420XX_RL_MAX_TP_ASYM; + rl_data->max_tp[ADF_SVC_SYM] = ADF_420XX_RL_MAX_TP_SYM; + rl_data->max_tp[ADF_SVC_DC] = ADF_420XX_RL_MAX_TP_DC; + rl_data->scan_interval = ADF_420XX_RL_SCANS_PER_SEC; + rl_data->scale_ref = ADF_420XX_RL_SLICE_REF; +} + +static int get_rp_group(struct adf_accel_dev *accel_dev, u32 ae_mask) +{ + switch (ae_mask) { + case ADF_AE_GROUP_0: + return RP_GROUP_0; + case ADF_AE_GROUP_1: + case ADF_AE_GROUP_3: + return RP_GROUP_1; + case ADF_AE_GROUP_2: + if (get_fw_config(accel_dev) == adf_fw_cy_config) + return RP_GROUP_0; + else + return RP_GROUP_1; + default: + dev_dbg(&GET_DEV(accel_dev), "ae_mask not recognized"); + return -EINVAL; + } +} + +static u32 get_ena_thd_mask(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + const struct adf_fw_config *fw_config; + + if (obj_num >= uof_get_num_objs(accel_dev)) + return ADF_GEN4_ENA_THD_MASK_ERROR; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return ADF_GEN4_ENA_THD_MASK_ERROR; + + switch (fw_config[obj_num].obj) { + case ADF_FW_ASYM_OBJ: + return ENA_THD_MASK_ASYM; + case ADF_FW_SYM_OBJ: + return ENA_THD_MASK_SYM; + case ADF_FW_DC_OBJ: + return ENA_THD_MASK_DC; + default: + return ADF_GEN4_ENA_THD_MASK_ERROR; + } +} + +static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev) +{ + enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { }; + const struct adf_fw_config *fw_config; + u16 ring_to_svc_map; + int i, j; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return 0; + + /* If dcc, all rings handle compression requests */ + if (adf_get_service_enabled(accel_dev) == SVC_DCC) { + for (i = 0; i < RP_GROUP_COUNT; i++) + rps[i] = COMP; + goto set_mask; + } + + for (i = 0; i < RP_GROUP_COUNT; i++) { + switch (fw_config[i].ae_mask) { + case ADF_AE_GROUP_0: + j = RP_GROUP_0; + break; + case ADF_AE_GROUP_1: + j = RP_GROUP_1; + break; + default: + return 0; + } + + switch (fw_config[i].obj) { + case ADF_FW_SYM_OBJ: + rps[j] = SYM; + break; + case ADF_FW_ASYM_OBJ: + rps[j] = ASYM; + break; + case ADF_FW_DC_OBJ: + rps[j] = COMP; + break; + default: + rps[j] = 0; + break; + } + } + +set_mask: + ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT | + rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT | + rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT | + rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT; + + return ring_to_svc_map; +} + +static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num, + const char * const fw_objs[], int num_objs) +{ + const struct adf_fw_config *fw_config; + int id; + + fw_config = get_fw_config(accel_dev); + if (fw_config) + id = fw_config[obj_num].obj; + else + id = -EINVAL; + + if (id < 0 || id > num_objs) + return NULL; + + return fw_objs[id]; +} + +static const char *uof_get_name_420xx(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + int num_fw_objs = ARRAY_SIZE(adf_420xx_fw_objs); + + return uof_get_name(accel_dev, obj_num, adf_420xx_fw_objs, num_fw_objs); +} + +static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + const struct adf_fw_config *fw_config; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return 0; + + return fw_config[obj_num].ae_mask; +} + +static void adf_gen4_set_err_mask(struct adf_dev_err_mask *dev_err_mask) +{ + dev_err_mask->cppagentcmdpar_mask = ADF_420XX_HICPPAGENTCMDPARERRLOG_MASK; + dev_err_mask->parerr_ath_cph_mask = ADF_420XX_PARITYERRORMASK_ATH_CPH_MASK; + dev_err_mask->parerr_cpr_xlt_mask = ADF_420XX_PARITYERRORMASK_CPR_XLT_MASK; + dev_err_mask->parerr_dcpr_ucs_mask = ADF_420XX_PARITYERRORMASK_DCPR_UCS_MASK; + dev_err_mask->parerr_pke_mask = ADF_420XX_PARITYERRORMASK_PKE_MASK; + dev_err_mask->ssmfeatren_mask = ADF_420XX_SSMFEATREN_MASK; +} + +void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id) +{ + hw_data->dev_class = &adf_420xx_class; + hw_data->instance_id = adf_420xx_class.instances++; + hw_data->num_banks = ADF_GEN4_ETR_MAX_BANKS; + hw_data->num_banks_per_vf = ADF_GEN4_NUM_BANKS_PER_VF; + hw_data->num_rings_per_bank = ADF_GEN4_NUM_RINGS_PER_BANK; + hw_data->num_accel = ADF_GEN4_MAX_ACCELERATORS; + hw_data->num_engines = ADF_420XX_MAX_ACCELENGINES; + hw_data->num_logical_accel = 1; + hw_data->tx_rx_gap = ADF_GEN4_RX_RINGS_OFFSET; + hw_data->tx_rings_mask = ADF_GEN4_TX_RINGS_MASK; + hw_data->ring_to_svc_map = ADF_GEN4_DEFAULT_RING_TO_SRV_MAP; + hw_data->alloc_irq = adf_isr_resource_alloc; + hw_data->free_irq = adf_isr_resource_free; + hw_data->enable_error_correction = adf_gen4_enable_error_correction; + hw_data->get_accel_mask = adf_gen4_get_accel_mask; + hw_data->get_ae_mask = get_ae_mask; + hw_data->get_num_accels = adf_gen4_get_num_accels; + hw_data->get_num_aes = adf_gen4_get_num_aes; + hw_data->get_sram_bar_id = adf_gen4_get_sram_bar_id; + hw_data->get_etr_bar_id = adf_gen4_get_etr_bar_id; + hw_data->get_misc_bar_id = adf_gen4_get_misc_bar_id; + hw_data->get_arb_info = adf_gen4_get_arb_info; + hw_data->get_admin_info = adf_gen4_get_admin_info; + hw_data->get_accel_cap = get_accel_cap; + hw_data->get_sku = adf_gen4_get_sku; + hw_data->init_admin_comms = adf_init_admin_comms; + hw_data->exit_admin_comms = adf_exit_admin_comms; + hw_data->send_admin_init = adf_send_admin_init; + hw_data->init_arb = adf_init_arb; + hw_data->exit_arb = adf_exit_arb; + hw_data->get_arb_mapping = adf_get_arbiter_mapping; + hw_data->enable_ints = adf_gen4_enable_ints; + hw_data->init_device = adf_gen4_init_device; + hw_data->reset_device = adf_reset_flr; + hw_data->admin_ae_mask = ADF_420XX_ADMIN_AE_MASK; + hw_data->num_rps = ADF_GEN4_MAX_RPS; + hw_data->fw_name = ADF_420XX_FW; + hw_data->fw_mmp_name = ADF_420XX_MMP; + hw_data->uof_get_name = uof_get_name_420xx; + hw_data->uof_get_num_objs = uof_get_num_objs; + hw_data->uof_get_ae_mask = uof_get_ae_mask; + hw_data->get_rp_group = get_rp_group; + hw_data->get_ena_thd_mask = get_ena_thd_mask; + hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable; + hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; + hw_data->get_ring_to_svc_map = get_ring_to_svc_map; + hw_data->disable_iov = adf_disable_sriov; + hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; + hw_data->enable_pm = adf_gen4_enable_pm; + hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt; + hw_data->dev_config = adf_gen4_dev_config; + hw_data->start_timer = adf_gen4_timer_start; + hw_data->stop_timer = adf_gen4_timer_stop; + hw_data->get_hb_clock = adf_gen4_get_heartbeat_clock; + hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE; + hw_data->clock_frequency = ADF_420XX_AE_FREQ; + + adf_gen4_set_err_mask(&hw_data->dev_err_mask); + adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops); + adf_gen4_init_dc_ops(&hw_data->dc_ops); + adf_gen4_init_ras_ops(&hw_data->ras_ops); + adf_gen4_init_tl_data(&hw_data->tl_data); + adf_init_rl_data(&hw_data->rl_data); +} + +void adf_clean_hw_data_420xx(struct adf_hw_device_data *hw_data) +{ + hw_data->dev_class->instances--; +} diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.h b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.h new file mode 100644 index 0000000000..99abbfc148 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Intel Corporation */ +#ifndef ADF_420XX_HW_DATA_H_ +#define ADF_420XX_HW_DATA_H_ + +#include <adf_accel_devices.h> + +#define ADF_420XX_MAX_ACCELENGINES 17 + +#define ADF_420XX_ACCELENGINES_MASK 0x1FFFF +#define ADF_420XX_ADMIN_AE_MASK 0x10000 + +#define ADF_420XX_HICPPAGENTCMDPARERRLOG_MASK (0xFF) +#define ADF_420XX_PARITYERRORMASK_ATH_CPH_MASK (0xFF00FF) +#define ADF_420XX_PARITYERRORMASK_CPR_XLT_MASK (0x10001) +#define ADF_420XX_PARITYERRORMASK_DCPR_UCS_MASK (0xF0007) +#define ADF_420XX_PARITYERRORMASK_PKE_MASK (0xFFF) +#define ADF_420XX_PARITYERRORMASK_WAT_WCP_MASK (0x3FF03FF) + +/* + * SSMFEATREN bit mask + * BIT(4) - enables parity detection on CPP + * BIT(12) - enables the logging of push/pull data errors + * in pperr register + * BIT(16) - BIT(27) - enable parity detection on SPPs + */ +#define ADF_420XX_SSMFEATREN_MASK \ + (BIT(4) | BIT(12) | BIT(16) | BIT(17) | BIT(18) | BIT(19) | BIT(20) | \ + BIT(21) | BIT(22) | BIT(23) | BIT(24) | BIT(25) | BIT(26) | BIT(27)) + +/* Firmware Binaries */ +#define ADF_420XX_FW "qat_420xx.bin" +#define ADF_420XX_MMP "qat_420xx_mmp.bin" +#define ADF_420XX_SYM_OBJ "qat_420xx_sym.bin" +#define ADF_420XX_DC_OBJ "qat_420xx_dc.bin" +#define ADF_420XX_ASYM_OBJ "qat_420xx_asym.bin" +#define ADF_420XX_ADMIN_OBJ "qat_420xx_admin.bin" + +/* RL constants */ +#define ADF_420XX_RL_PCIE_SCALE_FACTOR_DIV 100 +#define ADF_420XX_RL_PCIE_SCALE_FACTOR_MUL 102 +#define ADF_420XX_RL_DCPR_CORRECTION 1 +#define ADF_420XX_RL_SCANS_PER_SEC 954 +#define ADF_420XX_RL_MAX_TP_ASYM 173750UL +#define ADF_420XX_RL_MAX_TP_SYM 95000UL +#define ADF_420XX_RL_MAX_TP_DC 40000UL +#define ADF_420XX_RL_SLICE_REF 1000UL + +/* Clocks frequency */ +#define ADF_420XX_AE_FREQ (1000 * HZ_PER_MHZ) + +void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id); +void adf_clean_hw_data_420xx(struct adf_hw_device_data *hw_data); + +#endif diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_drv.c b/drivers/crypto/intel/qat/qat_420xx/adf_drv.c new file mode 100644 index 0000000000..2a3598409e --- /dev/null +++ b/drivers/crypto/intel/qat/qat_420xx/adf_drv.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation */ +#include <linux/device.h> +#include <linux/module.h> +#include <linux/pci.h> + +#include <adf_accel_devices.h> +#include <adf_gen4_hw_data.h> +#include <adf_gen4_config.h> +#include <adf_cfg.h> +#include <adf_common_drv.h> +#include <adf_dbgfs.h> + +#include "adf_420xx_hw_data.h" + +static const struct pci_device_id adf_pci_tbl[] = { + { PCI_VDEVICE(INTEL, ADF_420XX_PCI_DEVICE_ID), }, + { } +}; +MODULE_DEVICE_TABLE(pci, adf_pci_tbl); + +static void adf_cleanup_accel(struct adf_accel_dev *accel_dev) +{ + if (accel_dev->hw_device) { + adf_clean_hw_data_420xx(accel_dev->hw_device); + accel_dev->hw_device = NULL; + } + adf_dbgfs_exit(accel_dev); + adf_cfg_dev_remove(accel_dev); + adf_devmgr_rm_dev(accel_dev, NULL); +} + +static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct adf_accel_dev *accel_dev; + struct adf_accel_pci *accel_pci_dev; + struct adf_hw_device_data *hw_data; + unsigned int i, bar_nr; + unsigned long bar_mask; + struct adf_bar *bar; + int ret; + + if (num_possible_nodes() > 1 && dev_to_node(&pdev->dev) < 0) { + /* + * If the accelerator is connected to a node with no memory + * there is no point in using the accelerator since the remote + * memory transaction will be very slow. + */ + dev_err(&pdev->dev, "Invalid NUMA configuration.\n"); + return -EINVAL; + } + + accel_dev = devm_kzalloc(&pdev->dev, sizeof(*accel_dev), GFP_KERNEL); + if (!accel_dev) + return -ENOMEM; + + INIT_LIST_HEAD(&accel_dev->crypto_list); + accel_pci_dev = &accel_dev->accel_pci_dev; + accel_pci_dev->pci_dev = pdev; + + /* + * Add accel device to accel table + * This should be called before adf_cleanup_accel is called + */ + if (adf_devmgr_add_dev(accel_dev, NULL)) { + dev_err(&pdev->dev, "Failed to add new accelerator device.\n"); + return -EFAULT; + } + + accel_dev->owner = THIS_MODULE; + /* Allocate and initialise device hardware meta-data structure */ + hw_data = devm_kzalloc(&pdev->dev, sizeof(*hw_data), GFP_KERNEL); + if (!hw_data) { + ret = -ENOMEM; + goto out_err; + } + + accel_dev->hw_device = hw_data; + adf_init_hw_data_420xx(accel_dev->hw_device, ent->device); + + pci_read_config_byte(pdev, PCI_REVISION_ID, &accel_pci_dev->revid); + pci_read_config_dword(pdev, ADF_GEN4_FUSECTL4_OFFSET, &hw_data->fuses); + + /* Get Accelerators and Accelerators Engines masks */ + hw_data->accel_mask = hw_data->get_accel_mask(hw_data); + hw_data->ae_mask = hw_data->get_ae_mask(hw_data); + accel_pci_dev->sku = hw_data->get_sku(hw_data); + /* If the device has no acceleration engines then ignore it */ + if (!hw_data->accel_mask || !hw_data->ae_mask || + (~hw_data->ae_mask & 0x01)) { + dev_err(&pdev->dev, "No acceleration units found.\n"); + ret = -EFAULT; + goto out_err; + } + + /* Create device configuration table */ + ret = adf_cfg_dev_add(accel_dev); + if (ret) + goto out_err; + + /* Enable PCI device */ + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(&pdev->dev, "Can't enable PCI device.\n"); + goto out_err; + } + + /* Set DMA identifier */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + dev_err(&pdev->dev, "No usable DMA configuration.\n"); + goto out_err; + } + + ret = adf_gen4_cfg_dev_init(accel_dev); + if (ret) { + dev_err(&pdev->dev, "Failed to initialize configuration.\n"); + goto out_err; + } + + /* Get accelerator capabilities mask */ + hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev); + if (!hw_data->accel_capabilities_mask) { + dev_err(&pdev->dev, "Failed to get capabilities mask.\n"); + ret = -EINVAL; + goto out_err; + } + + /* Find and map all the device's BARS */ + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM) & ADF_GEN4_BAR_MASK; + + ret = pcim_iomap_regions_request_all(pdev, bar_mask, pci_name(pdev)); + if (ret) { + dev_err(&pdev->dev, "Failed to map pci regions.\n"); + goto out_err; + } + + i = 0; + for_each_set_bit(bar_nr, &bar_mask, PCI_STD_NUM_BARS) { + bar = &accel_pci_dev->pci_bars[i++]; + bar->virt_addr = pcim_iomap_table(pdev)[bar_nr]; + } + + pci_set_master(pdev); + + if (pci_save_state(pdev)) { + dev_err(&pdev->dev, "Failed to save pci state.\n"); + ret = -ENOMEM; + goto out_err; + } + + accel_dev->ras_errors.enabled = true; + adf_dbgfs_init(accel_dev); + + ret = adf_dev_up(accel_dev, true); + if (ret) + goto out_err_dev_stop; + + ret = adf_sysfs_init(accel_dev); + if (ret) + goto out_err_dev_stop; + + return ret; + +out_err_dev_stop: + adf_dev_down(accel_dev, false); +out_err: + adf_cleanup_accel(accel_dev); + return ret; +} + +static void adf_remove(struct pci_dev *pdev) +{ + struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); + + if (!accel_dev) { + pr_err("QAT: Driver removal failed\n"); + return; + } + adf_dev_down(accel_dev, false); + adf_cleanup_accel(accel_dev); +} + +static struct pci_driver adf_driver = { + .id_table = adf_pci_tbl, + .name = ADF_420XX_DEVICE_NAME, + .probe = adf_probe, + .remove = adf_remove, + .sriov_configure = adf_sriov_configure, + .err_handler = &adf_err_handler, +}; + +module_pci_driver(adf_driver); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel"); +MODULE_FIRMWARE(ADF_420XX_FW); +MODULE_FIRMWARE(ADF_420XX_MMP); +MODULE_DESCRIPTION("Intel(R) QuickAssist Technology"); +MODULE_VERSION(ADF_DRV_VERSION); +MODULE_SOFTDEP("pre: crypto-intel_qat"); +MODULE_IMPORT_NS(CRYPTO_QAT); diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index b64aaecdd9..e171cddf6f 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -7,12 +7,15 @@ #include <adf_cfg_services.h> #include <adf_clock.h> #include <adf_common_drv.h> +#include <adf_fw_config.h> +#include <adf_gen4_config.h> #include <adf_gen4_dc.h> #include <adf_gen4_hw_data.h> #include <adf_gen4_pfvf.h> #include <adf_gen4_pm.h> #include "adf_gen4_ras.h" #include <adf_gen4_timer.h> +#include <adf_gen4_tl.h> #include "adf_4xxx_hw_data.h" #include "icp_qat_hw.h" @@ -20,12 +23,10 @@ #define ADF_AE_GROUP_1 GENMASK(7, 4) #define ADF_AE_GROUP_2 BIT(8) -enum adf_fw_objs { - ADF_FW_SYM_OBJ, - ADF_FW_ASYM_OBJ, - ADF_FW_DC_OBJ, - ADF_FW_ADMIN_OBJ, -}; +#define ENA_THD_MASK_ASYM GENMASK(1, 0) +#define ENA_THD_MASK_ASYM_401XX GENMASK(5, 0) +#define ENA_THD_MASK_SYM GENMASK(6, 0) +#define ENA_THD_MASK_DC GENMASK(1, 0) static const char * const adf_4xxx_fw_objs[] = { [ADF_FW_SYM_OBJ] = ADF_4XXX_SYM_OBJ, @@ -41,11 +42,6 @@ static const char * const adf_402xx_fw_objs[] = { [ADF_FW_ADMIN_OBJ] = ADF_402XX_ADMIN_OBJ, }; -struct adf_fw_config { - u32 ae_mask; - enum adf_fw_objs obj; -}; - static const struct adf_fw_config adf_fw_cy_config[] = { {ADF_AE_GROUP_1, ADF_FW_SYM_OBJ}, {ADF_AE_GROUP_0, ADF_FW_ASYM_OBJ}, @@ -95,36 +91,12 @@ static_assert(ARRAY_SIZE(adf_fw_cy_config) == ARRAY_SIZE(adf_fw_asym_dc_config)) static_assert(ARRAY_SIZE(adf_fw_cy_config) == ARRAY_SIZE(adf_fw_sym_dc_config)); static_assert(ARRAY_SIZE(adf_fw_cy_config) == ARRAY_SIZE(adf_fw_dcc_config)); -/* Worker thread to service arbiter mappings */ -static const u32 default_thrd_to_arb_map[ADF_4XXX_MAX_ACCELENGINES] = { - 0x5555555, 0x5555555, 0x5555555, 0x5555555, - 0xAAAAAAA, 0xAAAAAAA, 0xAAAAAAA, 0xAAAAAAA, - 0x0 -}; - -static const u32 thrd_to_arb_map_dc[ADF_4XXX_MAX_ACCELENGINES] = { - 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF, - 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF, - 0x0 -}; - -static const u32 thrd_to_arb_map_dcc[ADF_4XXX_MAX_ACCELENGINES] = { - 0x00000000, 0x00000000, 0x00000000, 0x00000000, - 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, - 0x0 -}; - static struct adf_hw_device_class adf_4xxx_class = { .name = ADF_4XXX_DEVICE_NAME, .type = DEV_4XXX, .instances = 0, }; -static u32 get_accel_mask(struct adf_hw_device_data *self) -{ - return ADF_4XXX_ACCELERATORS_MASK; -} - static u32 get_ae_mask(struct adf_hw_device_data *self) { u32 me_disable = self->fuses; @@ -132,55 +104,6 @@ static u32 get_ae_mask(struct adf_hw_device_data *self) return ~me_disable & ADF_4XXX_ACCELENGINES_MASK; } -static u32 get_num_accels(struct adf_hw_device_data *self) -{ - return ADF_4XXX_MAX_ACCELERATORS; -} - -static u32 get_num_aes(struct adf_hw_device_data *self) -{ - if (!self || !self->ae_mask) - return 0; - - return hweight32(self->ae_mask); -} - -static u32 get_misc_bar_id(struct adf_hw_device_data *self) -{ - return ADF_4XXX_PMISC_BAR; -} - -static u32 get_etr_bar_id(struct adf_hw_device_data *self) -{ - return ADF_4XXX_ETR_BAR; -} - -static u32 get_sram_bar_id(struct adf_hw_device_data *self) -{ - return ADF_4XXX_SRAM_BAR; -} - -/* - * The vector routing table is used to select the MSI-X entry to use for each - * interrupt source. - * The first ADF_4XXX_ETR_MAX_BANKS entries correspond to ring interrupts. - * The final entry corresponds to VF2PF or error interrupts. - * This vector table could be used to configure one MSI-X entry to be shared - * between multiple interrupt sources. - * - * The default routing is set to have a one to one correspondence between the - * interrupt source and the MSI-X entry used. - */ -static void set_msix_default_rttable(struct adf_accel_dev *accel_dev) -{ - void __iomem *csr; - int i; - - csr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr; - for (i = 0; i <= ADF_4XXX_ETR_MAX_BANKS; i++) - ADF_CSR_WR(csr, ADF_4XXX_MSIX_RTTABLE_OFFSET(i), i); -} - static u32 get_accel_cap(struct adf_accel_dev *accel_dev) { struct pci_dev *pdev = accel_dev->accel_pci_dev.pci_dev; @@ -189,7 +112,7 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) u32 fusectl1; /* Read accelerator capabilities mask */ - pci_read_config_dword(pdev, ADF_4XXX_FUSECTL1_OFFSET, &fusectl1); + pci_read_config_dword(pdev, ADF_GEN4_FUSECTL1_OFFSET, &fusectl1); capabilities_sym = ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC | ICP_ACCEL_CAPABILITIES_CIPHER | @@ -204,27 +127,27 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) ICP_ACCEL_CAPABILITIES_AES_V2; /* A set bit in fusectl1 means the feature is OFF in this SKU */ - if (fusectl1 & ICP_ACCEL_4XXX_MASK_CIPHER_SLICE) { + if (fusectl1 & ICP_ACCEL_GEN4_MASK_CIPHER_SLICE) { capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_SYMMETRIC; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_HKDF; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER; } - if (fusectl1 & ICP_ACCEL_4XXX_MASK_UCS_SLICE) { + if (fusectl1 & ICP_ACCEL_GEN4_MASK_UCS_SLICE) { capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CHACHA_POLY; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AESGCM_SPC; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AES_V2; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER; } - if (fusectl1 & ICP_ACCEL_4XXX_MASK_AUTH_SLICE) { + if (fusectl1 & ICP_ACCEL_GEN4_MASK_AUTH_SLICE) { capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_AUTHENTICATION; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SHA3; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SHA3_EXT; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_CIPHER; } - if (fusectl1 & ICP_ACCEL_4XXX_MASK_SMX_SLICE) { + if (fusectl1 & ICP_ACCEL_GEN4_MASK_SMX_SLICE) { capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SM3; capabilities_sym &= ~ICP_ACCEL_CAPABILITIES_SM4; } @@ -234,7 +157,7 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) ICP_ACCEL_CAPABILITIES_SM2 | ICP_ACCEL_CAPABILITIES_ECEDMONT; - if (fusectl1 & ICP_ACCEL_4XXX_MASK_PKE_SLICE) { + if (fusectl1 & ICP_ACCEL_GEN4_MASK_PKE_SLICE) { capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_CRYPTO_ASYMMETRIC; capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_SM2; capabilities_asym &= ~ICP_ACCEL_CAPABILITIES_ECEDMONT; @@ -245,7 +168,7 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) ICP_ACCEL_CAPABILITIES_LZ4S_COMPRESSION | ICP_ACCEL_CAPABILITIES_CNV_INTEGRITY64; - if (fusectl1 & ICP_ACCEL_4XXX_MASK_COMPRESS_SLICE) { + if (fusectl1 & ICP_ACCEL_GEN4_MASK_COMPRESS_SLICE) { capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_COMPRESSION; capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_LZ4_COMPRESSION; capabilities_dc &= ~ICP_ACCEL_CAPABILITIES_LZ4S_COMPRESSION; @@ -281,43 +204,13 @@ static u32 get_accel_cap(struct adf_accel_dev *accel_dev) } } -static enum dev_sku_info get_sku(struct adf_hw_device_data *self) -{ - return DEV_SKU_1; -} - static const u32 *adf_get_arbiter_mapping(struct adf_accel_dev *accel_dev) { - switch (adf_get_service_enabled(accel_dev)) { - case SVC_DC: - return thrd_to_arb_map_dc; - case SVC_DCC: - return thrd_to_arb_map_dcc; - default: - return default_thrd_to_arb_map; - } -} + if (adf_gen4_init_thd2arb_map(accel_dev)) + dev_warn(&GET_DEV(accel_dev), + "Generate of the thread to arbiter map failed"); -static void get_arb_info(struct arb_info *arb_info) -{ - arb_info->arb_cfg = ADF_4XXX_ARB_CONFIG; - arb_info->arb_offset = ADF_4XXX_ARB_OFFSET; - arb_info->wt2sam_offset = ADF_4XXX_ARB_WRK_2_SER_MAP_OFFSET; -} - -static void get_admin_info(struct admin_info *admin_csrs_info) -{ - admin_csrs_info->mailbox_offset = ADF_4XXX_MAILBOX_BASE_OFFSET; - admin_csrs_info->admin_msg_ur = ADF_4XXX_ADMINMSGUR_OFFSET; - admin_csrs_info->admin_msg_lr = ADF_4XXX_ADMINMSGLR_OFFSET; -} - -static u32 get_heartbeat_clock(struct adf_hw_device_data *self) -{ - /* - * 4XXX uses KPT counter for HB - */ - return ADF_4XXX_KPT_COUNTER_FREQ; + return GET_HW_DATA(accel_dev)->thd_to_arb_map; } static void adf_init_rl_data(struct adf_rl_hw_data *rl_data) @@ -338,59 +231,7 @@ static void adf_init_rl_data(struct adf_rl_hw_data *rl_data) rl_data->scale_ref = ADF_4XXX_RL_SLICE_REF; } -static void adf_enable_error_correction(struct adf_accel_dev *accel_dev) -{ - struct adf_bar *misc_bar = &GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR]; - void __iomem *csr = misc_bar->virt_addr; - - /* Enable all in errsou3 except VFLR notification on host */ - ADF_CSR_WR(csr, ADF_GEN4_ERRMSK3, ADF_GEN4_VFLNOTIFY); -} - -static void adf_enable_ints(struct adf_accel_dev *accel_dev) -{ - void __iomem *addr; - - addr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr; - - /* Enable bundle interrupts */ - ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_RP_X0_MASK_OFFSET, 0); - ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_RP_X1_MASK_OFFSET, 0); - - /* Enable misc interrupts */ - ADF_CSR_WR(addr, ADF_4XXX_SMIAPF_MASK_OFFSET, 0); -} - -static int adf_init_device(struct adf_accel_dev *accel_dev) -{ - void __iomem *addr; - u32 status; - u32 csr; - int ret; - - addr = (&GET_BARS(accel_dev)[ADF_4XXX_PMISC_BAR])->virt_addr; - - /* Temporarily mask PM interrupt */ - csr = ADF_CSR_RD(addr, ADF_GEN4_ERRMSK2); - csr |= ADF_GEN4_PM_SOU; - ADF_CSR_WR(addr, ADF_GEN4_ERRMSK2, csr); - - /* Set DRV_ACTIVE bit to power up the device */ - ADF_CSR_WR(addr, ADF_GEN4_PM_INTERRUPT, ADF_GEN4_PM_DRV_ACTIVE); - - /* Poll status register to make sure the device is powered up */ - ret = read_poll_timeout(ADF_CSR_RD, status, - status & ADF_GEN4_PM_INIT_STATE, - ADF_GEN4_PM_POLL_DELAY_US, - ADF_GEN4_PM_POLL_TIMEOUT_US, true, addr, - ADF_GEN4_PM_STATUS); - if (ret) - dev_err(&GET_DEV(accel_dev), "Failed to power up the device\n"); - - return ret; -} - -static u32 uof_get_num_objs(void) +static u32 uof_get_num_objs(struct adf_accel_dev *accel_dev) { return ARRAY_SIZE(adf_fw_cy_config); } @@ -420,11 +261,64 @@ static const struct adf_fw_config *get_fw_config(struct adf_accel_dev *accel_dev } } -enum adf_rp_groups { - RP_GROUP_0 = 0, - RP_GROUP_1, - RP_GROUP_COUNT -}; +static int get_rp_group(struct adf_accel_dev *accel_dev, u32 ae_mask) +{ + switch (ae_mask) { + case ADF_AE_GROUP_0: + return RP_GROUP_0; + case ADF_AE_GROUP_1: + return RP_GROUP_1; + default: + dev_dbg(&GET_DEV(accel_dev), "ae_mask not recognized"); + return -EINVAL; + } +} + +static u32 get_ena_thd_mask(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + const struct adf_fw_config *fw_config; + + if (obj_num >= uof_get_num_objs(accel_dev)) + return ADF_GEN4_ENA_THD_MASK_ERROR; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return ADF_GEN4_ENA_THD_MASK_ERROR; + + switch (fw_config[obj_num].obj) { + case ADF_FW_ASYM_OBJ: + return ENA_THD_MASK_ASYM; + case ADF_FW_SYM_OBJ: + return ENA_THD_MASK_SYM; + case ADF_FW_DC_OBJ: + return ENA_THD_MASK_DC; + default: + return ADF_GEN4_ENA_THD_MASK_ERROR; + } +} + +static u32 get_ena_thd_mask_401xx(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + const struct adf_fw_config *fw_config; + + if (obj_num >= uof_get_num_objs(accel_dev)) + return ADF_GEN4_ENA_THD_MASK_ERROR; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return ADF_GEN4_ENA_THD_MASK_ERROR; + + switch (fw_config[obj_num].obj) { + case ADF_FW_ASYM_OBJ: + return ENA_THD_MASK_ASYM_401XX; + case ADF_FW_SYM_OBJ: + return ENA_THD_MASK_SYM; + case ADF_FW_DC_OBJ: + return ENA_THD_MASK_DC; + default: + return ADF_GEN4_ENA_THD_MASK_ERROR; + } +} static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev) { @@ -538,54 +432,64 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) { hw_data->dev_class = &adf_4xxx_class; hw_data->instance_id = adf_4xxx_class.instances++; - hw_data->num_banks = ADF_4XXX_ETR_MAX_BANKS; - hw_data->num_banks_per_vf = ADF_4XXX_NUM_BANKS_PER_VF; - hw_data->num_rings_per_bank = ADF_4XXX_NUM_RINGS_PER_BANK; - hw_data->num_accel = ADF_4XXX_MAX_ACCELERATORS; + hw_data->num_banks = ADF_GEN4_ETR_MAX_BANKS; + hw_data->num_banks_per_vf = ADF_GEN4_NUM_BANKS_PER_VF; + hw_data->num_rings_per_bank = ADF_GEN4_NUM_RINGS_PER_BANK; + hw_data->num_accel = ADF_GEN4_MAX_ACCELERATORS; hw_data->num_engines = ADF_4XXX_MAX_ACCELENGINES; hw_data->num_logical_accel = 1; - hw_data->tx_rx_gap = ADF_4XXX_RX_RINGS_OFFSET; - hw_data->tx_rings_mask = ADF_4XXX_TX_RINGS_MASK; + hw_data->tx_rx_gap = ADF_GEN4_RX_RINGS_OFFSET; + hw_data->tx_rings_mask = ADF_GEN4_TX_RINGS_MASK; hw_data->ring_to_svc_map = ADF_GEN4_DEFAULT_RING_TO_SRV_MAP; hw_data->alloc_irq = adf_isr_resource_alloc; hw_data->free_irq = adf_isr_resource_free; - hw_data->enable_error_correction = adf_enable_error_correction; - hw_data->get_accel_mask = get_accel_mask; + hw_data->enable_error_correction = adf_gen4_enable_error_correction; + hw_data->get_accel_mask = adf_gen4_get_accel_mask; hw_data->get_ae_mask = get_ae_mask; - hw_data->get_num_accels = get_num_accels; - hw_data->get_num_aes = get_num_aes; - hw_data->get_sram_bar_id = get_sram_bar_id; - hw_data->get_etr_bar_id = get_etr_bar_id; - hw_data->get_misc_bar_id = get_misc_bar_id; - hw_data->get_arb_info = get_arb_info; - hw_data->get_admin_info = get_admin_info; + hw_data->get_num_accels = adf_gen4_get_num_accels; + hw_data->get_num_aes = adf_gen4_get_num_aes; + hw_data->get_sram_bar_id = adf_gen4_get_sram_bar_id; + hw_data->get_etr_bar_id = adf_gen4_get_etr_bar_id; + hw_data->get_misc_bar_id = adf_gen4_get_misc_bar_id; + hw_data->get_arb_info = adf_gen4_get_arb_info; + hw_data->get_admin_info = adf_gen4_get_admin_info; hw_data->get_accel_cap = get_accel_cap; - hw_data->get_sku = get_sku; + hw_data->get_sku = adf_gen4_get_sku; hw_data->init_admin_comms = adf_init_admin_comms; hw_data->exit_admin_comms = adf_exit_admin_comms; hw_data->send_admin_init = adf_send_admin_init; hw_data->init_arb = adf_init_arb; hw_data->exit_arb = adf_exit_arb; hw_data->get_arb_mapping = adf_get_arbiter_mapping; - hw_data->enable_ints = adf_enable_ints; - hw_data->init_device = adf_init_device; + hw_data->enable_ints = adf_gen4_enable_ints; + hw_data->init_device = adf_gen4_init_device; hw_data->reset_device = adf_reset_flr; hw_data->admin_ae_mask = ADF_4XXX_ADMIN_AE_MASK; + hw_data->num_rps = ADF_GEN4_MAX_RPS; switch (dev_id) { case ADF_402XX_PCI_DEVICE_ID: hw_data->fw_name = ADF_402XX_FW; hw_data->fw_mmp_name = ADF_402XX_MMP; hw_data->uof_get_name = uof_get_name_402xx; + hw_data->get_ena_thd_mask = get_ena_thd_mask; + break; + case ADF_401XX_PCI_DEVICE_ID: + hw_data->fw_name = ADF_4XXX_FW; + hw_data->fw_mmp_name = ADF_4XXX_MMP; + hw_data->uof_get_name = uof_get_name_4xxx; + hw_data->get_ena_thd_mask = get_ena_thd_mask_401xx; break; - default: hw_data->fw_name = ADF_4XXX_FW; hw_data->fw_mmp_name = ADF_4XXX_MMP; hw_data->uof_get_name = uof_get_name_4xxx; + hw_data->get_ena_thd_mask = get_ena_thd_mask; + break; } hw_data->uof_get_num_objs = uof_get_num_objs; hw_data->uof_get_ae_mask = uof_get_ae_mask; - hw_data->set_msix_rttable = set_msix_default_rttable; + hw_data->get_rp_group = get_rp_group; + hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; hw_data->get_ring_to_svc_map = get_ring_to_svc_map; hw_data->disable_iov = adf_disable_sriov; @@ -595,7 +499,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->dev_config = adf_gen4_dev_config; hw_data->start_timer = adf_gen4_timer_start; hw_data->stop_timer = adf_gen4_timer_stop; - hw_data->get_hb_clock = get_heartbeat_clock; + hw_data->get_hb_clock = adf_gen4_get_heartbeat_clock; hw_data->num_hb_ctrs = ADF_NUM_HB_CNT_PER_AE; hw_data->clock_frequency = ADF_4XXX_AE_FREQ; @@ -604,6 +508,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen4_init_dc_ops(&hw_data->dc_ops); adf_gen4_init_ras_ops(&hw_data->ras_ops); + adf_gen4_init_tl_data(&hw_data->tl_data); adf_init_rl_data(&hw_data->rl_data); } diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.h index 33423295e9..76388363ea 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.h +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.h @@ -6,25 +6,8 @@ #include <linux/units.h> #include <adf_accel_devices.h> -/* PCIe configuration space */ -#define ADF_4XXX_SRAM_BAR 0 -#define ADF_4XXX_PMISC_BAR 1 -#define ADF_4XXX_ETR_BAR 2 -#define ADF_4XXX_RX_RINGS_OFFSET 1 -#define ADF_4XXX_TX_RINGS_MASK 0x1 -#define ADF_4XXX_MAX_ACCELERATORS 1 #define ADF_4XXX_MAX_ACCELENGINES 9 -#define ADF_4XXX_BAR_MASK (BIT(0) | BIT(2) | BIT(4)) -/* Physical function fuses */ -#define ADF_4XXX_FUSECTL0_OFFSET (0x2C8) -#define ADF_4XXX_FUSECTL1_OFFSET (0x2CC) -#define ADF_4XXX_FUSECTL2_OFFSET (0x2D0) -#define ADF_4XXX_FUSECTL3_OFFSET (0x2D4) -#define ADF_4XXX_FUSECTL4_OFFSET (0x2D8) -#define ADF_4XXX_FUSECTL5_OFFSET (0x2DC) - -#define ADF_4XXX_ACCELERATORS_MASK (0x1) #define ADF_4XXX_ACCELENGINES_MASK (0x1FF) #define ADF_4XXX_ADMIN_AE_MASK (0x100) @@ -45,28 +28,6 @@ (BIT(4) | BIT(12) | BIT(16) | BIT(17) | BIT(18) | \ BIT(19) | BIT(20) | BIT(21) | BIT(22) | BIT(23)) -#define ADF_4XXX_ETR_MAX_BANKS 64 - -/* MSIX interrupt */ -#define ADF_4XXX_SMIAPF_RP_X0_MASK_OFFSET (0x41A040) -#define ADF_4XXX_SMIAPF_RP_X1_MASK_OFFSET (0x41A044) -#define ADF_4XXX_SMIAPF_MASK_OFFSET (0x41A084) -#define ADF_4XXX_MSIX_RTTABLE_OFFSET(i) (0x409000 + ((i) * 0x04)) - -/* Bank and ring configuration */ -#define ADF_4XXX_NUM_RINGS_PER_BANK 2 -#define ADF_4XXX_NUM_BANKS_PER_VF 4 - -/* Arbiter configuration */ -#define ADF_4XXX_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0)) -#define ADF_4XXX_ARB_OFFSET (0x0) -#define ADF_4XXX_ARB_WRK_2_SER_MAP_OFFSET (0x400) - -/* Admin Interface Reg Offset */ -#define ADF_4XXX_ADMINMSGUR_OFFSET (0x500574) -#define ADF_4XXX_ADMINMSGLR_OFFSET (0x500578) -#define ADF_4XXX_MAILBOX_BASE_OFFSET (0x600970) - /* Firmware Binaries */ #define ADF_4XXX_FW "qat_4xxx.bin" #define ADF_4XXX_MMP "qat_4xxx_mmp.bin" @@ -93,22 +54,9 @@ #define ADF_4XXX_RL_SLICE_REF 1000UL /* Clocks frequency */ -#define ADF_4XXX_KPT_COUNTER_FREQ (100 * HZ_PER_MHZ) #define ADF_4XXX_AE_FREQ (1000 * HZ_PER_MHZ) -/* qat_4xxx fuse bits are different from old GENs, redefine them */ -enum icp_qat_4xxx_slice_mask { - ICP_ACCEL_4XXX_MASK_CIPHER_SLICE = BIT(0), - ICP_ACCEL_4XXX_MASK_AUTH_SLICE = BIT(1), - ICP_ACCEL_4XXX_MASK_PKE_SLICE = BIT(2), - ICP_ACCEL_4XXX_MASK_COMPRESS_SLICE = BIT(3), - ICP_ACCEL_4XXX_MASK_UCS_SLICE = BIT(4), - ICP_ACCEL_4XXX_MASK_EIA3_SLICE = BIT(5), - ICP_ACCEL_4XXX_MASK_SMX_SLICE = BIT(7), -}; - void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id); void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data); -int adf_gen4_dev_config(struct adf_accel_dev *accel_dev); #endif diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c index 8f483d1197..9762f2bf77 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_drv.c @@ -8,13 +8,10 @@ #include <adf_cfg.h> #include <adf_common_drv.h> #include <adf_dbgfs.h> -#include <adf_heartbeat.h> +#include <adf_gen4_config.h> +#include <adf_gen4_hw_data.h> #include "adf_4xxx_hw_data.h" -#include "adf_cfg_services.h" -#include "qat_compression.h" -#include "qat_crypto.h" -#include "adf_transport_access_macros.h" static const struct pci_device_id adf_pci_tbl[] = { { PCI_VDEVICE(INTEL, ADF_4XXX_PCI_DEVICE_ID), }, @@ -35,270 +32,6 @@ static void adf_cleanup_accel(struct adf_accel_dev *accel_dev) adf_devmgr_rm_dev(accel_dev, NULL); } -static int adf_cfg_dev_init(struct adf_accel_dev *accel_dev) -{ - const char *config; - int ret; - - config = accel_dev->accel_id % 2 ? ADF_CFG_DC : ADF_CFG_CY; - - ret = adf_cfg_section_add(accel_dev, ADF_GENERAL_SEC); - if (ret) - return ret; - - /* Default configuration is crypto only for even devices - * and compression for odd devices - */ - ret = adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC, - ADF_SERVICES_ENABLED, config, - ADF_STR); - if (ret) - return ret; - - adf_heartbeat_save_cfg_param(accel_dev, ADF_CFG_HB_TIMER_MIN_MS); - - return 0; -} - -static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) -{ - char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; - int banks = GET_MAX_BANKS(accel_dev); - int cpus = num_online_cpus(); - unsigned long bank, val; - int instances; - int ret; - int i; - - if (adf_hw_dev_has_crypto(accel_dev)) - instances = min(cpus, banks / 2); - else - instances = 0; - - for (i = 0; i < instances; i++) { - val = i; - bank = i * 2; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &bank, ADF_DEC); - if (ret) - goto err; - - bank += 1; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &bank, ADF_DEC); - if (ret) - goto err; - - snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY, - i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); - val = 128; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 512; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 0; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 0; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 1; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 1; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = ADF_COALESCING_DEF_TIME; - snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); - ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", - key, &val, ADF_DEC); - if (ret) - goto err; - } - - val = i; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, - &val, ADF_DEC); - if (ret) - goto err; - - val = 0; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, - &val, ADF_DEC); - if (ret) - goto err; - - return 0; -err: - dev_err(&GET_DEV(accel_dev), "Failed to add configuration for crypto\n"); - return ret; -} - -static int adf_comp_dev_config(struct adf_accel_dev *accel_dev) -{ - char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; - int banks = GET_MAX_BANKS(accel_dev); - int cpus = num_online_cpus(); - unsigned long val; - int instances; - int ret; - int i; - - if (adf_hw_dev_has_compression(accel_dev)) - instances = min(cpus, banks); - else - instances = 0; - - for (i = 0; i < instances; i++) { - val = i; - snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 512; - snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 0; - snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 1; - snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = ADF_COALESCING_DEF_TIME; - snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); - ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", - key, &val, ADF_DEC); - if (ret) - goto err; - } - - val = i; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, - &val, ADF_DEC); - if (ret) - goto err; - - val = 0; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, - &val, ADF_DEC); - if (ret) - goto err; - - return 0; -err: - dev_err(&GET_DEV(accel_dev), "Failed to add configuration for compression\n"); - return ret; -} - -static int adf_no_dev_config(struct adf_accel_dev *accel_dev) -{ - unsigned long val; - int ret; - - val = 0; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, - &val, ADF_DEC); - if (ret) - return ret; - - return adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, - &val, ADF_DEC); -} - -int adf_gen4_dev_config(struct adf_accel_dev *accel_dev) -{ - char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; - int ret; - - ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); - if (ret) - goto err; - - ret = adf_cfg_section_add(accel_dev, "Accelerator0"); - if (ret) - goto err; - - ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, - ADF_SERVICES_ENABLED, services); - if (ret) - goto err; - - ret = sysfs_match_string(adf_cfg_services, services); - if (ret < 0) - goto err; - - switch (ret) { - case SVC_CY: - case SVC_CY2: - ret = adf_crypto_dev_config(accel_dev); - break; - case SVC_DC: - case SVC_DCC: - ret = adf_comp_dev_config(accel_dev); - break; - default: - ret = adf_no_dev_config(accel_dev); - break; - } - - if (ret) - goto err; - - set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); - - return ret; - -err: - dev_err(&GET_DEV(accel_dev), "Failed to configure QAT driver\n"); - return ret; -} - static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { struct adf_accel_dev *accel_dev; @@ -348,7 +81,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) adf_init_hw_data_4xxx(accel_dev->hw_device, ent->device); pci_read_config_byte(pdev, PCI_REVISION_ID, &accel_pci_dev->revid); - pci_read_config_dword(pdev, ADF_4XXX_FUSECTL4_OFFSET, &hw_data->fuses); + pci_read_config_dword(pdev, ADF_GEN4_FUSECTL4_OFFSET, &hw_data->fuses); /* Get Accelerators and Accelerators Engines masks */ hw_data->accel_mask = hw_data->get_accel_mask(hw_data); @@ -381,7 +114,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err; } - ret = adf_cfg_dev_init(accel_dev); + ret = adf_gen4_cfg_dev_init(accel_dev); if (ret) { dev_err(&pdev->dev, "Failed to initialize configuration.\n"); goto out_err; @@ -396,7 +129,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } /* Find and map all the device's BARS */ - bar_mask = pci_select_bars(pdev, IORESOURCE_MEM) & ADF_4XXX_BAR_MASK; + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM) & ADF_GEN4_BAR_MASK; ret = pcim_iomap_regions_request_all(pdev, bar_mask, pci_name(pdev)); if (ret) { diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 779a8aa0b8..6908727bff 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -16,6 +16,7 @@ intel_qat-objs := adf_cfg.o \ adf_sysfs_ras_counters.o \ adf_gen2_hw_data.o \ adf_gen2_config.o \ + adf_gen4_config.o \ adf_gen4_hw_data.o \ adf_gen4_pm.o \ adf_gen2_dc.o \ @@ -40,9 +41,12 @@ intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \ adf_fw_counters.o \ adf_cnv_dbgfs.o \ adf_gen4_pm_debugfs.o \ + adf_gen4_tl.o \ adf_heartbeat.o \ adf_heartbeat_dbgfs.o \ adf_pm_dbgfs.o \ + adf_telemetry.o \ + adf_tl_debugfs.o \ adf_dbgfs.o intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index 9d5fdd529a..a16c7e6edc 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -6,11 +6,14 @@ #include <linux/module.h> #include <linux/list.h> #include <linux/io.h> +#include <linux/pci.h> #include <linux/ratelimit.h> #include <linux/types.h> #include "adf_cfg_common.h" #include "adf_rl.h" +#include "adf_telemetry.h" #include "adf_pfvf_msg.h" +#include "icp_qat_hw.h" #define ADF_DH895XCC_DEVICE_NAME "dh895xcc" #define ADF_DH895XCCVF_DEVICE_NAME "dh895xccvf" @@ -19,12 +22,15 @@ #define ADF_C3XXX_DEVICE_NAME "c3xxx" #define ADF_C3XXXVF_DEVICE_NAME "c3xxxvf" #define ADF_4XXX_DEVICE_NAME "4xxx" +#define ADF_420XX_DEVICE_NAME "420xx" #define ADF_4XXX_PCI_DEVICE_ID 0x4940 #define ADF_4XXXIOV_PCI_DEVICE_ID 0x4941 #define ADF_401XX_PCI_DEVICE_ID 0x4942 #define ADF_401XXIOV_PCI_DEVICE_ID 0x4943 #define ADF_402XX_PCI_DEVICE_ID 0x4944 #define ADF_402XXIOV_PCI_DEVICE_ID 0x4945 +#define ADF_420XX_PCI_DEVICE_ID 0x4946 +#define ADF_420XXIOV_PCI_DEVICE_ID 0x4947 #define ADF_DEVICE_FUSECTL_OFFSET 0x40 #define ADF_DEVICE_LEGFUSE_OFFSET 0x4C #define ADF_DEVICE_FUSECTL_MASK 0x80000000 @@ -241,8 +247,10 @@ struct adf_hw_device_data { void (*reset_device)(struct adf_accel_dev *accel_dev); void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num); - u32 (*uof_get_num_objs)(void); + u32 (*uof_get_num_objs)(struct adf_accel_dev *accel_dev); u32 (*uof_get_ae_mask)(struct adf_accel_dev *accel_dev, u32 obj_num); + int (*get_rp_group)(struct adf_accel_dev *accel_dev, u32 ae_mask); + u32 (*get_ena_thd_mask)(struct adf_accel_dev *accel_dev, u32 obj_num); int (*dev_config)(struct adf_accel_dev *accel_dev); struct adf_pfvf_ops pfvf_ops; struct adf_hw_csr_ops csr_ops; @@ -250,6 +258,7 @@ struct adf_hw_device_data { struct adf_ras_ops ras_ops; struct adf_dev_err_mask dev_err_mask; struct adf_rl_hw_data rl_data; + struct adf_tl_hw_data tl_data; const char *fw_name; const char *fw_mmp_name; u32 fuses; @@ -264,6 +273,7 @@ struct adf_hw_device_data { u32 admin_ae_mask; u16 tx_rings_mask; u16 ring_to_svc_map; + u32 thd_to_arb_map[ICP_QAT_HW_AE_DELIMITER]; u8 tx_rx_gap; u8 num_banks; u16 num_banks_per_vf; @@ -272,6 +282,7 @@ struct adf_hw_device_data { u8 num_logical_accel; u8 num_engines; u32 num_hb_ctrs; + u8 num_rps; }; /* CSR write macro */ @@ -304,6 +315,7 @@ struct adf_hw_device_data { #define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops) #define GET_PFVF_OPS(accel_dev) (&(accel_dev)->hw_device->pfvf_ops) #define GET_DC_OPS(accel_dev) (&(accel_dev)->hw_device->dc_ops) +#define GET_TL_DATA(accel_dev) GET_HW_DATA(accel_dev)->tl_data #define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev struct adf_admin_comms; @@ -352,6 +364,7 @@ struct adf_accel_dev { struct adf_cfg_device_data *cfg; struct adf_fw_loader_data *fw_loader; struct adf_admin_comms *admin; + struct adf_telemetry *telemetry; struct adf_dc_data *dc_data; struct adf_pm power_management; struct list_head crypto_list; diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_engine.c b/drivers/crypto/intel/qat/qat_common/adf_accel_engine.c index 6be064dc64..4b5d0350fc 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_engine.c +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_engine.c @@ -19,7 +19,7 @@ static int adf_ae_fw_load_images(struct adf_accel_dev *accel_dev, void *fw_addr, int i; loader = loader_data->fw_loader; - num_objs = hw_device->uof_get_num_objs(); + num_objs = hw_device->uof_get_num_objs(accel_dev); for (i = 0; i < num_objs; i++) { obj_name = hw_device->uof_get_name(accel_dev, i); diff --git a/drivers/crypto/intel/qat/qat_common/adf_admin.c b/drivers/crypto/intel/qat/qat_common/adf_admin.c index 54b673ec23..acad526eb7 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_admin.c +++ b/drivers/crypto/intel/qat/qat_common/adf_admin.c @@ -498,6 +498,43 @@ int adf_get_cnv_stats(struct adf_accel_dev *accel_dev, u16 ae, u16 *err_cnt, return ret; } +int adf_send_admin_tl_start(struct adf_accel_dev *accel_dev, + dma_addr_t tl_dma_addr, size_t layout_sz, u8 *rp_indexes, + struct icp_qat_fw_init_admin_slice_cnt *slice_count) +{ + u32 ae_mask = GET_HW_DATA(accel_dev)->admin_ae_mask; + struct icp_qat_fw_init_admin_resp resp = { }; + struct icp_qat_fw_init_admin_req req = { }; + int ret; + + req.cmd_id = ICP_QAT_FW_TL_START; + req.init_cfg_ptr = tl_dma_addr; + req.init_cfg_sz = layout_sz; + + if (rp_indexes) + memcpy(&req.rp_indexes, rp_indexes, sizeof(req.rp_indexes)); + + ret = adf_send_admin(accel_dev, &req, &resp, ae_mask); + if (ret) + return ret; + + memcpy(slice_count, &resp.slices, sizeof(*slice_count)); + + return 0; +} + +int adf_send_admin_tl_stop(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct icp_qat_fw_init_admin_resp resp = { }; + struct icp_qat_fw_init_admin_req req = { }; + u32 ae_mask = hw_data->admin_ae_mask; + + req.cmd_id = ICP_QAT_FW_TL_STOP; + + return adf_send_admin(accel_dev, &req, &resp, ae_mask); +} + int adf_init_admin_comms(struct adf_accel_dev *accel_dev) { struct adf_admin_comms *admin; diff --git a/drivers/crypto/intel/qat/qat_common/adf_admin.h b/drivers/crypto/intel/qat/qat_common/adf_admin.h index 55cbcbc66c..647c8e1967 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_admin.h +++ b/drivers/crypto/intel/qat/qat_common/adf_admin.h @@ -23,5 +23,9 @@ int adf_send_admin_rl_delete(struct adf_accel_dev *accel_dev, u16 node_id, int adf_get_fw_timestamp(struct adf_accel_dev *accel_dev, u64 *timestamp); int adf_get_pm_info(struct adf_accel_dev *accel_dev, dma_addr_t p_state_addr, size_t buff_size); int adf_get_cnv_stats(struct adf_accel_dev *accel_dev, u16 ae, u16 *err_cnt, u16 *latest_err); +int adf_send_admin_tl_start(struct adf_accel_dev *accel_dev, + dma_addr_t tl_dma_addr, size_t layout_sz, u8 *rp_indexes, + struct icp_qat_fw_init_admin_slice_cnt *slice_count); +int adf_send_admin_tl_stop(struct adf_accel_dev *accel_dev); #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h index 6e5de1dab9..89df3888d7 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_common.h @@ -47,6 +47,7 @@ enum adf_device_type { DEV_C3XXX, DEV_C3XXXVF, DEV_4XXX, + DEV_420XX, }; struct adf_dev_status_info { diff --git a/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c index 477efcc81a..c42f5c25aa 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_dbgfs.c @@ -10,6 +10,7 @@ #include "adf_fw_counters.h" #include "adf_heartbeat_dbgfs.h" #include "adf_pm_dbgfs.h" +#include "adf_tl_debugfs.h" /** * adf_dbgfs_init() - add persistent debugfs entries @@ -66,6 +67,7 @@ void adf_dbgfs_add(struct adf_accel_dev *accel_dev) adf_heartbeat_dbgfs_add(accel_dev); adf_pm_dbgfs_add(accel_dev); adf_cnv_dbgfs_add(accel_dev); + adf_tl_dbgfs_add(accel_dev); } } @@ -79,6 +81,7 @@ void adf_dbgfs_rm(struct adf_accel_dev *accel_dev) return; if (!accel_dev->is_vf) { + adf_tl_dbgfs_rm(accel_dev); adf_cnv_dbgfs_rm(accel_dev); adf_pm_dbgfs_rm(accel_dev); adf_heartbeat_dbgfs_rm(accel_dev); diff --git a/drivers/crypto/intel/qat/qat_common/adf_fw_config.h b/drivers/crypto/intel/qat/qat_common/adf_fw_config.h new file mode 100644 index 0000000000..4f86696800 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_fw_config.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Intel Corporation */ +#ifndef ADF_FW_CONFIG_H_ +#define ADF_FW_CONFIG_H_ + +enum adf_fw_objs { + ADF_FW_SYM_OBJ, + ADF_FW_ASYM_OBJ, + ADF_FW_DC_OBJ, + ADF_FW_ADMIN_OBJ, +}; + +struct adf_fw_config { + u32 ae_mask; + enum adf_fw_objs obj; +}; + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_config.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_config.c new file mode 100644 index 0000000000..fe1f3d727d --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_config.c @@ -0,0 +1,287 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation */ +#include "adf_accel_devices.h" +#include "adf_cfg.h" +#include "adf_cfg_services.h" +#include "adf_cfg_strings.h" +#include "adf_common_drv.h" +#include "adf_gen4_config.h" +#include "adf_heartbeat.h" +#include "adf_transport_access_macros.h" +#include "qat_compression.h" +#include "qat_crypto.h" + +static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long bank, val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_crypto(accel_dev)) + instances = min(cpus, banks / 2); + else + instances = 0; + + for (i = 0; i < instances; i++) { + val = i; + bank = i * 2; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &bank, ADF_DEC); + if (ret) + goto err; + + bank += 1; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &bank, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY, + i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); + val = 128; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 1; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 1; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = ADF_COALESCING_DEF_TIME; + snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); + ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + goto err; + + return 0; +err: + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for crypto\n"); + return ret; +} + +static int adf_comp_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_compression(accel_dev)) + instances = min(cpus, banks); + else + instances = 0; + + for (i = 0; i < instances; i++) { + val = i; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 1; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = ADF_COALESCING_DEF_TIME; + snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); + ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); + if (ret) + goto err; + + return 0; +err: + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for compression\n"); + return ret; +} + +static int adf_no_dev_config(struct adf_accel_dev *accel_dev) +{ + unsigned long val; + int ret; + + val = 0; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + return ret; + + return adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); +} + +/** + * adf_gen4_dev_config() - create dev config required to create instances + * + * @accel_dev: Pointer to acceleration device. + * + * Function creates device configuration required to create instances + * + * Return: 0 on success, error code otherwise. + */ +int adf_gen4_dev_config(struct adf_accel_dev *accel_dev) +{ + char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; + int ret; + + ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); + if (ret) + goto err; + + ret = adf_cfg_section_add(accel_dev, "Accelerator0"); + if (ret) + goto err; + + ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, + ADF_SERVICES_ENABLED, services); + if (ret) + goto err; + + ret = sysfs_match_string(adf_cfg_services, services); + if (ret < 0) + goto err; + + switch (ret) { + case SVC_CY: + case SVC_CY2: + ret = adf_crypto_dev_config(accel_dev); + break; + case SVC_DC: + case SVC_DCC: + ret = adf_comp_dev_config(accel_dev); + break; + default: + ret = adf_no_dev_config(accel_dev); + break; + } + + if (ret) + goto err; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + + return ret; + +err: + dev_err(&GET_DEV(accel_dev), "Failed to configure QAT driver\n"); + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen4_dev_config); + +int adf_gen4_cfg_dev_init(struct adf_accel_dev *accel_dev) +{ + const char *config; + int ret; + + config = accel_dev->accel_id % 2 ? ADF_CFG_DC : ADF_CFG_CY; + + ret = adf_cfg_section_add(accel_dev, ADF_GENERAL_SEC); + if (ret) + return ret; + + /* Default configuration is crypto only for even devices + * and compression for odd devices + */ + ret = adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC, + ADF_SERVICES_ENABLED, config, + ADF_STR); + if (ret) + return ret; + + adf_heartbeat_save_cfg_param(accel_dev, ADF_CFG_HB_TIMER_MIN_MS); + + return 0; +} +EXPORT_SYMBOL_GPL(adf_gen4_cfg_dev_init); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_config.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_config.h new file mode 100644 index 0000000000..bb87655f69 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_config.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2023 Intel Corporation */ +#ifndef ADF_GEN4_CONFIG_H_ +#define ADF_GEN4_CONFIG_H_ + +#include "adf_accel_devices.h" + +int adf_gen4_dev_config(struct adf_accel_dev *accel_dev); +int adf_gen4_cfg_dev_init(struct adf_accel_dev *accel_dev); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index 3148a62938..f752653ccb 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -2,8 +2,10 @@ /* Copyright(c) 2020 Intel Corporation */ #include <linux/iopoll.h> #include "adf_accel_devices.h" +#include "adf_cfg_services.h" #include "adf_common_drv.h" #include "adf_gen4_hw_data.h" +#include "adf_gen4_pm.h" static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) { @@ -102,6 +104,131 @@ void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) } EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); +u32 adf_gen4_get_accel_mask(struct adf_hw_device_data *self) +{ + return ADF_GEN4_ACCELERATORS_MASK; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_accel_mask); + +u32 adf_gen4_get_num_accels(struct adf_hw_device_data *self) +{ + return ADF_GEN4_MAX_ACCELERATORS; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_num_accels); + +u32 adf_gen4_get_num_aes(struct adf_hw_device_data *self) +{ + if (!self || !self->ae_mask) + return 0; + + return hweight32(self->ae_mask); +} +EXPORT_SYMBOL_GPL(adf_gen4_get_num_aes); + +u32 adf_gen4_get_misc_bar_id(struct adf_hw_device_data *self) +{ + return ADF_GEN4_PMISC_BAR; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_misc_bar_id); + +u32 adf_gen4_get_etr_bar_id(struct adf_hw_device_data *self) +{ + return ADF_GEN4_ETR_BAR; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_etr_bar_id); + +u32 adf_gen4_get_sram_bar_id(struct adf_hw_device_data *self) +{ + return ADF_GEN4_SRAM_BAR; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_sram_bar_id); + +enum dev_sku_info adf_gen4_get_sku(struct adf_hw_device_data *self) +{ + return DEV_SKU_1; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_sku); + +void adf_gen4_get_arb_info(struct arb_info *arb_info) +{ + arb_info->arb_cfg = ADF_GEN4_ARB_CONFIG; + arb_info->arb_offset = ADF_GEN4_ARB_OFFSET; + arb_info->wt2sam_offset = ADF_GEN4_ARB_WRK_2_SER_MAP_OFFSET; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_arb_info); + +void adf_gen4_get_admin_info(struct admin_info *admin_csrs_info) +{ + admin_csrs_info->mailbox_offset = ADF_GEN4_MAILBOX_BASE_OFFSET; + admin_csrs_info->admin_msg_ur = ADF_GEN4_ADMINMSGUR_OFFSET; + admin_csrs_info->admin_msg_lr = ADF_GEN4_ADMINMSGLR_OFFSET; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_admin_info); + +u32 adf_gen4_get_heartbeat_clock(struct adf_hw_device_data *self) +{ + /* + * GEN4 uses KPT counter for HB + */ + return ADF_GEN4_KPT_COUNTER_FREQ; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_heartbeat_clock); + +void adf_gen4_enable_error_correction(struct adf_accel_dev *accel_dev) +{ + struct adf_bar *misc_bar = &GET_BARS(accel_dev)[ADF_GEN4_PMISC_BAR]; + void __iomem *csr = misc_bar->virt_addr; + + /* Enable all in errsou3 except VFLR notification on host */ + ADF_CSR_WR(csr, ADF_GEN4_ERRMSK3, ADF_GEN4_VFLNOTIFY); +} +EXPORT_SYMBOL_GPL(adf_gen4_enable_error_correction); + +void adf_gen4_enable_ints(struct adf_accel_dev *accel_dev) +{ + void __iomem *addr; + + addr = (&GET_BARS(accel_dev)[ADF_GEN4_PMISC_BAR])->virt_addr; + + /* Enable bundle interrupts */ + ADF_CSR_WR(addr, ADF_GEN4_SMIAPF_RP_X0_MASK_OFFSET, 0); + ADF_CSR_WR(addr, ADF_GEN4_SMIAPF_RP_X1_MASK_OFFSET, 0); + + /* Enable misc interrupts */ + ADF_CSR_WR(addr, ADF_GEN4_SMIAPF_MASK_OFFSET, 0); +} +EXPORT_SYMBOL_GPL(adf_gen4_enable_ints); + +int adf_gen4_init_device(struct adf_accel_dev *accel_dev) +{ + void __iomem *addr; + u32 status; + u32 csr; + int ret; + + addr = (&GET_BARS(accel_dev)[ADF_GEN4_PMISC_BAR])->virt_addr; + + /* Temporarily mask PM interrupt */ + csr = ADF_CSR_RD(addr, ADF_GEN4_ERRMSK2); + csr |= ADF_GEN4_PM_SOU; + ADF_CSR_WR(addr, ADF_GEN4_ERRMSK2, csr); + + /* Set DRV_ACTIVE bit to power up the device */ + ADF_CSR_WR(addr, ADF_GEN4_PM_INTERRUPT, ADF_GEN4_PM_DRV_ACTIVE); + + /* Poll status register to make sure the device is powered up */ + ret = read_poll_timeout(ADF_CSR_RD, status, + status & ADF_GEN4_PM_INIT_STATE, + ADF_GEN4_PM_POLL_DELAY_US, + ADF_GEN4_PM_POLL_TIMEOUT_US, true, addr, + ADF_GEN4_PM_STATUS); + if (ret) + dev_err(&GET_DEV(accel_dev), "Failed to power up the device\n"); + + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_device); + static inline void adf_gen4_unpack_ssm_wdtimer(u64 value, u32 *upper, u32 *lower) { @@ -135,6 +262,28 @@ void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev) } EXPORT_SYMBOL_GPL(adf_gen4_set_ssm_wdtimer); +/* + * The vector routing table is used to select the MSI-X entry to use for each + * interrupt source. + * The first ADF_GEN4_ETR_MAX_BANKS entries correspond to ring interrupts. + * The final entry corresponds to VF2PF or error interrupts. + * This vector table could be used to configure one MSI-X entry to be shared + * between multiple interrupt sources. + * + * The default routing is set to have a one to one correspondence between the + * interrupt source and the MSI-X entry used. + */ +void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev) +{ + void __iomem *csr; + int i; + + csr = (&GET_BARS(accel_dev)[ADF_GEN4_PMISC_BAR])->virt_addr; + for (i = 0; i <= ADF_GEN4_ETR_MAX_BANKS; i++) + ADF_CSR_WR(csr, ADF_GEN4_MSIX_RTTABLE_OFFSET(i), i); +} +EXPORT_SYMBOL_GPL(adf_gen4_set_msix_default_rttable); + int adf_pfvf_comms_disabled(struct adf_accel_dev *accel_dev) { return 0; @@ -192,3 +341,95 @@ int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number) return ret; } EXPORT_SYMBOL_GPL(adf_gen4_ring_pair_reset); + +static const u32 thrd_to_arb_map_dcc[] = { + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, 0x0000FFFF, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x00000000, 0x00000000, 0x00000000, 0x00000000, + 0x0 +}; + +static const u16 rp_group_to_arb_mask[] = { + [RP_GROUP_0] = 0x5, + [RP_GROUP_1] = 0xA, +}; + +static bool is_single_service(int service_id) +{ + switch (service_id) { + case SVC_DC: + case SVC_SYM: + case SVC_ASYM: + return true; + case SVC_CY: + case SVC_CY2: + case SVC_DCC: + case SVC_ASYM_DC: + case SVC_DC_ASYM: + case SVC_SYM_DC: + case SVC_DC_SYM: + default: + return false; + } +} + +int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + u32 *thd2arb_map = hw_data->thd_to_arb_map; + unsigned int ae_cnt, worker_obj_cnt, i, j; + unsigned long ae_mask, thds_mask; + int srv_id, rp_group; + u32 thd2arb_map_base; + u16 arb_mask; + + if (!hw_data->get_rp_group || !hw_data->get_ena_thd_mask || + !hw_data->get_num_aes || !hw_data->uof_get_num_objs || + !hw_data->uof_get_ae_mask) + return -EFAULT; + + srv_id = adf_get_service_enabled(accel_dev); + if (srv_id < 0) + return srv_id; + + ae_cnt = hw_data->get_num_aes(hw_data); + worker_obj_cnt = hw_data->uof_get_num_objs(accel_dev) - + ADF_GEN4_ADMIN_ACCELENGINES; + + if (srv_id == SVC_DCC) { + if (ae_cnt > ICP_QAT_HW_AE_DELIMITER) + return -EINVAL; + + memcpy(thd2arb_map, thrd_to_arb_map_dcc, + array_size(sizeof(*thd2arb_map), ae_cnt)); + return 0; + } + + for (i = 0; i < worker_obj_cnt; i++) { + ae_mask = hw_data->uof_get_ae_mask(accel_dev, i); + rp_group = hw_data->get_rp_group(accel_dev, ae_mask); + thds_mask = hw_data->get_ena_thd_mask(accel_dev, i); + thd2arb_map_base = 0; + + if (rp_group >= RP_GROUP_COUNT || rp_group < RP_GROUP_0) + return -EINVAL; + + if (thds_mask == ADF_GEN4_ENA_THD_MASK_ERROR) + return -EINVAL; + + if (is_single_service(srv_id)) + arb_mask = rp_group_to_arb_mask[RP_GROUP_0] | + rp_group_to_arb_mask[RP_GROUP_1]; + else + arb_mask = rp_group_to_arb_mask[rp_group]; + + for_each_set_bit(j, &thds_mask, ADF_NUM_THREADS_PER_AE) + thd2arb_map_base |= arb_mask << (j * 4); + + for_each_set_bit(j, &ae_mask, ae_cnt) + thd2arb_map[j] = thd2arb_map_base; + } + return 0; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_thd2arb_map); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index 1813fe1d5a..7d8a774cad 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -3,9 +3,57 @@ #ifndef ADF_GEN4_HW_CSR_DATA_H_ #define ADF_GEN4_HW_CSR_DATA_H_ +#include <linux/units.h> + #include "adf_accel_devices.h" #include "adf_cfg_common.h" +/* PCIe configuration space */ +#define ADF_GEN4_BAR_MASK (BIT(0) | BIT(2) | BIT(4)) +#define ADF_GEN4_SRAM_BAR 0 +#define ADF_GEN4_PMISC_BAR 1 +#define ADF_GEN4_ETR_BAR 2 + +/* Clocks frequency */ +#define ADF_GEN4_KPT_COUNTER_FREQ (100 * HZ_PER_MHZ) + +/* Physical function fuses */ +#define ADF_GEN4_FUSECTL0_OFFSET 0x2C8 +#define ADF_GEN4_FUSECTL1_OFFSET 0x2CC +#define ADF_GEN4_FUSECTL2_OFFSET 0x2D0 +#define ADF_GEN4_FUSECTL3_OFFSET 0x2D4 +#define ADF_GEN4_FUSECTL4_OFFSET 0x2D8 +#define ADF_GEN4_FUSECTL5_OFFSET 0x2DC + +/* Accelerators */ +#define ADF_GEN4_ACCELERATORS_MASK 0x1 +#define ADF_GEN4_MAX_ACCELERATORS 1 +#define ADF_GEN4_ADMIN_ACCELENGINES 1 + +/* MSIX interrupt */ +#define ADF_GEN4_SMIAPF_RP_X0_MASK_OFFSET 0x41A040 +#define ADF_GEN4_SMIAPF_RP_X1_MASK_OFFSET 0x41A044 +#define ADF_GEN4_SMIAPF_MASK_OFFSET 0x41A084 +#define ADF_GEN4_MSIX_RTTABLE_OFFSET(i) (0x409000 + ((i) * 0x04)) + +/* Bank and ring configuration */ +#define ADF_GEN4_MAX_RPS 64 +#define ADF_GEN4_NUM_RINGS_PER_BANK 2 +#define ADF_GEN4_NUM_BANKS_PER_VF 4 +#define ADF_GEN4_ETR_MAX_BANKS 64 +#define ADF_GEN4_RX_RINGS_OFFSET 1 +#define ADF_GEN4_TX_RINGS_MASK 0x1 + +/* Arbiter configuration */ +#define ADF_GEN4_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0)) +#define ADF_GEN4_ARB_OFFSET 0x0 +#define ADF_GEN4_ARB_WRK_2_SER_MAP_OFFSET 0x400 + +/* Admin Interface Reg Offset */ +#define ADF_GEN4_ADMINMSGUR_OFFSET 0x500574 +#define ADF_GEN4_ADMINMSGLR_OFFSET 0x500578 +#define ADF_GEN4_MAILBOX_BASE_OFFSET 0x600970 + /* Transport access */ #define ADF_BANK_INT_SRC_SEL_MASK 0x44UL #define ADF_RING_CSR_RING_CONFIG 0x1000 @@ -146,7 +194,46 @@ do { \ #define ADF_GEN4_RL_TOKEN_PCIEIN_BUCKET_OFFSET 0x508800 #define ADF_GEN4_RL_TOKEN_PCIEOUT_BUCKET_OFFSET 0x508804 +/* Arbiter threads mask with error value */ +#define ADF_GEN4_ENA_THD_MASK_ERROR GENMASK(ADF_NUM_THREADS_PER_AE, 0) + void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); + +enum icp_qat_gen4_slice_mask { + ICP_ACCEL_GEN4_MASK_CIPHER_SLICE = BIT(0), + ICP_ACCEL_GEN4_MASK_AUTH_SLICE = BIT(1), + ICP_ACCEL_GEN4_MASK_PKE_SLICE = BIT(2), + ICP_ACCEL_GEN4_MASK_COMPRESS_SLICE = BIT(3), + ICP_ACCEL_GEN4_MASK_UCS_SLICE = BIT(4), + ICP_ACCEL_GEN4_MASK_EIA3_SLICE = BIT(5), + ICP_ACCEL_GEN4_MASK_SMX_SLICE = BIT(7), + ICP_ACCEL_GEN4_MASK_WCP_WAT_SLICE = BIT(8), + ICP_ACCEL_GEN4_MASK_ZUC_256_SLICE = BIT(9), +}; + +enum adf_gen4_rp_groups { + RP_GROUP_0, + RP_GROUP_1, + RP_GROUP_COUNT +}; + +void adf_gen4_enable_error_correction(struct adf_accel_dev *accel_dev); +void adf_gen4_enable_ints(struct adf_accel_dev *accel_dev); +u32 adf_gen4_get_accel_mask(struct adf_hw_device_data *self); +void adf_gen4_get_admin_info(struct admin_info *admin_csrs_info); +void adf_gen4_get_arb_info(struct arb_info *arb_info); +u32 adf_gen4_get_etr_bar_id(struct adf_hw_device_data *self); +u32 adf_gen4_get_heartbeat_clock(struct adf_hw_device_data *self); +u32 adf_gen4_get_misc_bar_id(struct adf_hw_device_data *self); +u32 adf_gen4_get_num_accels(struct adf_hw_device_data *self); +u32 adf_gen4_get_num_aes(struct adf_hw_device_data *self); +enum dev_sku_info adf_gen4_get_sku(struct adf_hw_device_data *self); +u32 adf_gen4_get_sram_bar_id(struct adf_hw_device_data *self); +int adf_gen4_init_device(struct adf_accel_dev *accel_dev); void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number); +void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev); +void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); +int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev); + #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c new file mode 100644 index 0000000000..7fc7a77f6a --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2023 Intel Corporation. */ +#include <linux/export.h> +#include <linux/kernel.h> + +#include "adf_gen4_tl.h" +#include "adf_telemetry.h" +#include "adf_tl_debugfs.h" + +#define ADF_GEN4_TL_DEV_REG_OFF(reg) ADF_TL_DEV_REG_OFF(reg, gen4) + +#define ADF_GEN4_TL_RP_REG_OFF(reg) ADF_TL_RP_REG_OFF(reg, gen4) + +#define ADF_GEN4_TL_SL_UTIL_COUNTER(_name) \ + ADF_TL_COUNTER("util_" #_name, \ + ADF_TL_SIMPLE_COUNT, \ + ADF_TL_SLICE_REG_OFF(_name, reg_tm_slice_util, gen4)) + +#define ADF_GEN4_TL_SL_EXEC_COUNTER(_name) \ + ADF_TL_COUNTER("exec_" #_name, \ + ADF_TL_SIMPLE_COUNT, \ + ADF_TL_SLICE_REG_OFF(_name, reg_tm_slice_exec_cnt, gen4)) + +/* Device level counters. */ +static const struct adf_tl_dbg_counter dev_counters[] = { + /* PCIe partial transactions. */ + ADF_TL_COUNTER(PCI_TRANS_CNT_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_pci_trans_cnt)), + /* Max read latency[ns]. */ + ADF_TL_COUNTER(MAX_RD_LAT_NAME, ADF_TL_COUNTER_NS, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_rd_lat_max)), + /* Read latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(RD_LAT_ACC_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_rd_lat_acc), + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_rd_cmpl_cnt)), + /* Max get to put latency[ns]. */ + ADF_TL_COUNTER(MAX_LAT_NAME, ADF_TL_COUNTER_NS, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_gp_lat_max)), + /* Get to put latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(LAT_ACC_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_gp_lat_acc), + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_ae_put_cnt)), + /* PCIe write bandwidth[Mbps]. */ + ADF_TL_COUNTER(BW_IN_NAME, ADF_TL_COUNTER_MBPS, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_bw_in)), + /* PCIe read bandwidth[Mbps]. */ + ADF_TL_COUNTER(BW_OUT_NAME, ADF_TL_COUNTER_MBPS, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_bw_out)), + /* Page request latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(PAGE_REQ_LAT_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_at_page_req_lat_acc), + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_at_page_req_cnt)), + /* Page translation latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(AT_TRANS_LAT_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_at_trans_lat_acc), + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_at_trans_lat_cnt)), + /* Maximum uTLB used. */ + ADF_TL_COUNTER(AT_MAX_UTLB_USED_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN4_TL_DEV_REG_OFF(reg_tl_at_max_tlb_used)), +}; + +/* Slice utilization counters. */ +static const struct adf_tl_dbg_counter sl_util_counters[ADF_TL_SL_CNT_COUNT] = { + /* Compression slice utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(cpr), + /* Translator slice utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(xlt), + /* Decompression slice utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(dcpr), + /* PKE utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(pke), + /* Wireless Authentication slice utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(wat), + /* Wireless Cipher slice utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(wcp), + /* UCS slice utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(ucs), + /* Cipher slice utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(cph), + /* Authentication slice utilization. */ + ADF_GEN4_TL_SL_UTIL_COUNTER(ath), +}; + +/* Slice execution counters. */ +static const struct adf_tl_dbg_counter sl_exec_counters[ADF_TL_SL_CNT_COUNT] = { + /* Compression slice execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(cpr), + /* Translator slice execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(xlt), + /* Decompression slice execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(dcpr), + /* PKE execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(pke), + /* Wireless Authentication slice execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(wat), + /* Wireless Cipher slice execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(wcp), + /* UCS slice execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(ucs), + /* Cipher slice execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(cph), + /* Authentication slice execution count. */ + ADF_GEN4_TL_SL_EXEC_COUNTER(ath), +}; + +/* Ring pair counters. */ +static const struct adf_tl_dbg_counter rp_counters[] = { + /* PCIe partial transactions. */ + ADF_TL_COUNTER(PCI_TRANS_CNT_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN4_TL_RP_REG_OFF(reg_tl_pci_trans_cnt)), + /* Get to put latency average[ns]. */ + ADF_TL_COUNTER_LATENCY(LAT_ACC_NAME, ADF_TL_COUNTER_NS_AVG, + ADF_GEN4_TL_RP_REG_OFF(reg_tl_gp_lat_acc), + ADF_GEN4_TL_RP_REG_OFF(reg_tl_ae_put_cnt)), + /* PCIe write bandwidth[Mbps]. */ + ADF_TL_COUNTER(BW_IN_NAME, ADF_TL_COUNTER_MBPS, + ADF_GEN4_TL_RP_REG_OFF(reg_tl_bw_in)), + /* PCIe read bandwidth[Mbps]. */ + ADF_TL_COUNTER(BW_OUT_NAME, ADF_TL_COUNTER_MBPS, + ADF_GEN4_TL_RP_REG_OFF(reg_tl_bw_out)), + /* Message descriptor DevTLB hit rate. */ + ADF_TL_COUNTER(AT_GLOB_DTLB_HIT_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN4_TL_RP_REG_OFF(reg_tl_at_glob_devtlb_hit)), + /* Message descriptor DevTLB miss rate. */ + ADF_TL_COUNTER(AT_GLOB_DTLB_MISS_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN4_TL_RP_REG_OFF(reg_tl_at_glob_devtlb_miss)), + /* Payload DevTLB hit rate. */ + ADF_TL_COUNTER(AT_PAYLD_DTLB_HIT_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN4_TL_RP_REG_OFF(reg_tl_at_payld_devtlb_hit)), + /* Payload DevTLB miss rate. */ + ADF_TL_COUNTER(AT_PAYLD_DTLB_MISS_NAME, ADF_TL_SIMPLE_COUNT, + ADF_GEN4_TL_RP_REG_OFF(reg_tl_at_payld_devtlb_miss)), +}; + +void adf_gen4_init_tl_data(struct adf_tl_hw_data *tl_data) +{ + tl_data->layout_sz = ADF_GEN4_TL_LAYOUT_SZ; + tl_data->slice_reg_sz = ADF_GEN4_TL_SLICE_REG_SZ; + tl_data->rp_reg_sz = ADF_GEN4_TL_RP_REG_SZ; + tl_data->num_hbuff = ADF_GEN4_TL_NUM_HIST_BUFFS; + tl_data->max_rp = ADF_GEN4_TL_MAX_RP_NUM; + tl_data->msg_cnt_off = ADF_GEN4_TL_MSG_CNT_OFF; + tl_data->cpp_ns_per_cycle = ADF_GEN4_CPP_NS_PER_CYCLE; + tl_data->bw_units_to_bytes = ADF_GEN4_TL_BW_HW_UNITS_TO_BYTES; + + tl_data->dev_counters = dev_counters; + tl_data->num_dev_counters = ARRAY_SIZE(dev_counters); + tl_data->sl_util_counters = sl_util_counters; + tl_data->sl_exec_counters = sl_exec_counters; + tl_data->rp_counters = rp_counters; + tl_data->num_rp_counters = ARRAY_SIZE(rp_counters); +} +EXPORT_SYMBOL_GPL(adf_gen4_init_tl_data); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.h new file mode 100644 index 0000000000..32df4163be --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_tl.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2023 Intel Corporation. */ +#ifndef ADF_GEN4_TL_H +#define ADF_GEN4_TL_H + +#include <linux/stddef.h> +#include <linux/types.h> + +struct adf_tl_hw_data; + +/* Computation constants. */ +#define ADF_GEN4_CPP_NS_PER_CYCLE 2 +#define ADF_GEN4_TL_BW_HW_UNITS_TO_BYTES 64 + +/* Maximum aggregation time. Value in milliseconds. */ +#define ADF_GEN4_TL_MAX_AGGR_TIME_MS 4000 +/* Num of buffers to store historic values. */ +#define ADF_GEN4_TL_NUM_HIST_BUFFS \ + (ADF_GEN4_TL_MAX_AGGR_TIME_MS / ADF_TL_DATA_WR_INTERVAL_MS) + +/* Max number of HW resources of one type. */ +#define ADF_GEN4_TL_MAX_SLICES_PER_TYPE 24 + +/* Max number of simultaneously monitored ring pairs. */ +#define ADF_GEN4_TL_MAX_RP_NUM 4 + +/** + * struct adf_gen4_tl_slice_data_regs - HW slice data as populated by FW. + * @reg_tm_slice_exec_cnt: Slice execution count. + * @reg_tm_slice_util: Slice utilization. + */ +struct adf_gen4_tl_slice_data_regs { + __u32 reg_tm_slice_exec_cnt; + __u32 reg_tm_slice_util; +}; + +#define ADF_GEN4_TL_SLICE_REG_SZ sizeof(struct adf_gen4_tl_slice_data_regs) + +/** + * struct adf_gen4_tl_device_data_regs - This structure stores device telemetry + * counter values as are being populated periodically by device. + * @reg_tl_rd_lat_acc: read latency accumulator + * @reg_tl_gp_lat_acc: get-put latency accumulator + * @reg_tl_at_page_req_lat_acc: AT/DevTLB page request latency accumulator + * @reg_tl_at_trans_lat_acc: DevTLB transaction latency accumulator + * @reg_tl_re_acc: accumulated ring empty time + * @reg_tl_pci_trans_cnt: PCIe partial transactions + * @reg_tl_rd_lat_max: maximum logged read latency + * @reg_tl_rd_cmpl_cnt: read requests completed count + * @reg_tl_gp_lat_max: maximum logged get to put latency + * @reg_tl_ae_put_cnt: Accelerator Engine put counts across all rings + * @reg_tl_bw_in: PCIe write bandwidth + * @reg_tl_bw_out: PCIe read bandwidth + * @reg_tl_at_page_req_cnt: DevTLB page requests count + * @reg_tl_at_trans_lat_cnt: DevTLB transaction latency samples count + * @reg_tl_at_max_tlb_used: maximum uTLB used + * @reg_tl_re_cnt: ring empty time samples count + * @reserved: reserved + * @ath_slices: array of Authentication slices utilization registers + * @cph_slices: array of Cipher slices utilization registers + * @cpr_slices: array of Compression slices utilization registers + * @xlt_slices: array of Translator slices utilization registers + * @dcpr_slices: array of Decompression slices utilization registers + * @pke_slices: array of PKE slices utilization registers + * @ucs_slices: array of UCS slices utilization registers + * @wat_slices: array of Wireless Authentication slices utilization registers + * @wcp_slices: array of Wireless Cipher slices utilization registers + */ +struct adf_gen4_tl_device_data_regs { + __u64 reg_tl_rd_lat_acc; + __u64 reg_tl_gp_lat_acc; + __u64 reg_tl_at_page_req_lat_acc; + __u64 reg_tl_at_trans_lat_acc; + __u64 reg_tl_re_acc; + __u32 reg_tl_pci_trans_cnt; + __u32 reg_tl_rd_lat_max; + __u32 reg_tl_rd_cmpl_cnt; + __u32 reg_tl_gp_lat_max; + __u32 reg_tl_ae_put_cnt; + __u32 reg_tl_bw_in; + __u32 reg_tl_bw_out; + __u32 reg_tl_at_page_req_cnt; + __u32 reg_tl_at_trans_lat_cnt; + __u32 reg_tl_at_max_tlb_used; + __u32 reg_tl_re_cnt; + __u32 reserved; + struct adf_gen4_tl_slice_data_regs ath_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; + struct adf_gen4_tl_slice_data_regs cph_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; + struct adf_gen4_tl_slice_data_regs cpr_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; + struct adf_gen4_tl_slice_data_regs xlt_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; + struct adf_gen4_tl_slice_data_regs dcpr_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; + struct adf_gen4_tl_slice_data_regs pke_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; + struct adf_gen4_tl_slice_data_regs ucs_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; + struct adf_gen4_tl_slice_data_regs wat_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; + struct adf_gen4_tl_slice_data_regs wcp_slices[ADF_GEN4_TL_MAX_SLICES_PER_TYPE]; +}; + +/** + * struct adf_gen4_tl_ring_pair_data_regs - This structure stores Ring Pair + * telemetry counter values as are being populated periodically by device. + * @reg_tl_gp_lat_acc: get-put latency accumulator + * @reserved: reserved + * @reg_tl_pci_trans_cnt: PCIe partial transactions + * @reg_tl_ae_put_cnt: Accelerator Engine put counts across all rings + * @reg_tl_bw_in: PCIe write bandwidth + * @reg_tl_bw_out: PCIe read bandwidth + * @reg_tl_at_glob_devtlb_hit: Message descriptor DevTLB hit rate + * @reg_tl_at_glob_devtlb_miss: Message descriptor DevTLB miss rate + * @reg_tl_at_payld_devtlb_hit: Payload DevTLB hit rate + * @reg_tl_at_payld_devtlb_miss: Payload DevTLB miss rate + * @reg_tl_re_cnt: ring empty time samples count + * @reserved1: reserved + */ +struct adf_gen4_tl_ring_pair_data_regs { + __u64 reg_tl_gp_lat_acc; + __u64 reserved; + __u32 reg_tl_pci_trans_cnt; + __u32 reg_tl_ae_put_cnt; + __u32 reg_tl_bw_in; + __u32 reg_tl_bw_out; + __u32 reg_tl_at_glob_devtlb_hit; + __u32 reg_tl_at_glob_devtlb_miss; + __u32 reg_tl_at_payld_devtlb_hit; + __u32 reg_tl_at_payld_devtlb_miss; + __u32 reg_tl_re_cnt; + __u32 reserved1; +}; + +#define ADF_GEN4_TL_RP_REG_SZ sizeof(struct adf_gen4_tl_ring_pair_data_regs) + +/** + * struct adf_gen4_tl_layout - This structure represents entire telemetry + * counters data: Device + 4 Ring Pairs as are being populated periodically + * by device. + * @tl_device_data_regs: structure of device telemetry registers + * @tl_ring_pairs_data_regs: array of ring pairs telemetry registers + * @reg_tl_msg_cnt: telemetry messages counter + * @reserved: reserved + */ +struct adf_gen4_tl_layout { + struct adf_gen4_tl_device_data_regs tl_device_data_regs; + struct adf_gen4_tl_ring_pair_data_regs + tl_ring_pairs_data_regs[ADF_GEN4_TL_MAX_RP_NUM]; + __u32 reg_tl_msg_cnt; + __u32 reserved; +}; + +#define ADF_GEN4_TL_LAYOUT_SZ sizeof(struct adf_gen4_tl_layout) +#define ADF_GEN4_TL_MSG_CNT_OFF offsetof(struct adf_gen4_tl_layout, reg_tl_msg_cnt) + +#ifdef CONFIG_DEBUG_FS +void adf_gen4_init_tl_data(struct adf_tl_hw_data *tl_data); +#else +static inline void adf_gen4_init_tl_data(struct adf_tl_hw_data *tl_data) +{ +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* ADF_GEN4_TL_H */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c index 81c39f3d07..f43ae91115 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_init.c +++ b/drivers/crypto/intel/qat/qat_common/adf_init.c @@ -11,6 +11,7 @@ #include "adf_heartbeat.h" #include "adf_rl.h" #include "adf_sysfs_ras_counters.h" +#include "adf_telemetry.h" static LIST_HEAD(service_table); static DEFINE_MUTEX(service_lock); @@ -142,6 +143,10 @@ static int adf_dev_init(struct adf_accel_dev *accel_dev) if (ret && ret != -EOPNOTSUPP) return ret; + ret = adf_tl_init(accel_dev); + if (ret && ret != -EOPNOTSUPP) + return ret; + /* * Subservice initialisation is divided into two stages: init and start. * This is to facilitate any ordering dependencies between services @@ -220,6 +225,10 @@ static int adf_dev_start(struct adf_accel_dev *accel_dev) if (ret && ret != -EOPNOTSUPP) return ret; + ret = adf_tl_start(accel_dev); + if (ret && ret != -EOPNOTSUPP) + return ret; + list_for_each_entry(service, &service_table, list) { if (service->event_hld(accel_dev, ADF_EVENT_START)) { dev_err(&GET_DEV(accel_dev), @@ -279,6 +288,7 @@ static void adf_dev_stop(struct adf_accel_dev *accel_dev) !test_bit(ADF_STATUS_STARTING, &accel_dev->status)) return; + adf_tl_stop(accel_dev); adf_rl_stop(accel_dev); adf_dbgfs_rm(accel_dev); adf_sysfs_stop_ras(accel_dev); @@ -374,6 +384,8 @@ static void adf_dev_shutdown(struct adf_accel_dev *accel_dev) adf_heartbeat_shutdown(accel_dev); + adf_tl_shutdown(accel_dev); + hw_data->disable_iov(accel_dev); if (test_bit(ADF_STATUS_IRQ_ALLOCATED, &accel_dev->status)) { diff --git a/drivers/crypto/intel/qat/qat_common/adf_telemetry.c b/drivers/crypto/intel/qat/qat_common/adf_telemetry.c new file mode 100644 index 0000000000..2ff714d11b --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_telemetry.c @@ -0,0 +1,288 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2023 Intel Corporation. */ +#define dev_fmt(fmt) "Telemetry: " fmt + +#include <asm/errno.h> +#include <linux/atomic.h> +#include <linux/device.h> +#include <linux/dev_printk.h> +#include <linux/dma-mapping.h> +#include <linux/jiffies.h> +#include <linux/kernel.h> +#include <linux/mutex.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/workqueue.h> + +#include "adf_admin.h" +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "adf_telemetry.h" + +#define TL_IS_ZERO(input) ((input) == 0) + +static bool is_tl_supported(struct adf_accel_dev *accel_dev) +{ + u16 fw_caps = GET_HW_DATA(accel_dev)->fw_capabilities; + + return fw_caps & TL_CAPABILITY_BIT; +} + +static int validate_tl_data(struct adf_tl_hw_data *tl_data) +{ + if (!tl_data->dev_counters || + TL_IS_ZERO(tl_data->num_dev_counters) || + !tl_data->sl_util_counters || + !tl_data->sl_exec_counters || + !tl_data->rp_counters || + TL_IS_ZERO(tl_data->num_rp_counters)) + return -EOPNOTSUPP; + + return 0; +} + +static int adf_tl_alloc_mem(struct adf_accel_dev *accel_dev) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); + struct device *dev = &GET_DEV(accel_dev); + size_t regs_sz = tl_data->layout_sz; + struct adf_telemetry *telemetry; + int node = dev_to_node(dev); + void *tl_data_regs; + unsigned int i; + + telemetry = kzalloc_node(sizeof(*telemetry), GFP_KERNEL, node); + if (!telemetry) + return -ENOMEM; + + telemetry->rp_num_indexes = kmalloc_array(tl_data->max_rp, + sizeof(*telemetry->rp_num_indexes), + GFP_KERNEL); + if (!telemetry->rp_num_indexes) + goto err_free_tl; + + telemetry->regs_hist_buff = kmalloc_array(tl_data->num_hbuff, + sizeof(*telemetry->regs_hist_buff), + GFP_KERNEL); + if (!telemetry->regs_hist_buff) + goto err_free_rp_indexes; + + telemetry->regs_data = dma_alloc_coherent(dev, regs_sz, + &telemetry->regs_data_p, + GFP_KERNEL); + if (!telemetry->regs_data) + goto err_free_regs_hist_buff; + + for (i = 0; i < tl_data->num_hbuff; i++) { + tl_data_regs = kzalloc_node(regs_sz, GFP_KERNEL, node); + if (!tl_data_regs) + goto err_free_dma; + + telemetry->regs_hist_buff[i] = tl_data_regs; + } + + accel_dev->telemetry = telemetry; + + return 0; + +err_free_dma: + dma_free_coherent(dev, regs_sz, telemetry->regs_data, + telemetry->regs_data_p); + + while (i--) + kfree(telemetry->regs_hist_buff[i]); + +err_free_regs_hist_buff: + kfree(telemetry->regs_hist_buff); +err_free_rp_indexes: + kfree(telemetry->rp_num_indexes); +err_free_tl: + kfree(telemetry); + + return -ENOMEM; +} + +static void adf_tl_free_mem(struct adf_accel_dev *accel_dev) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); + struct adf_telemetry *telemetry = accel_dev->telemetry; + struct device *dev = &GET_DEV(accel_dev); + size_t regs_sz = tl_data->layout_sz; + unsigned int i; + + for (i = 0; i < tl_data->num_hbuff; i++) + kfree(telemetry->regs_hist_buff[i]); + + dma_free_coherent(dev, regs_sz, telemetry->regs_data, + telemetry->regs_data_p); + + kfree(telemetry->regs_hist_buff); + kfree(telemetry->rp_num_indexes); + kfree(telemetry); + accel_dev->telemetry = NULL; +} + +static unsigned long get_next_timeout(void) +{ + return msecs_to_jiffies(ADF_TL_TIMER_INT_MS); +} + +static void snapshot_regs(struct adf_telemetry *telemetry, size_t size) +{ + void *dst = telemetry->regs_hist_buff[telemetry->hb_num]; + void *src = telemetry->regs_data; + + memcpy(dst, src, size); +} + +static void tl_work_handler(struct work_struct *work) +{ + struct delayed_work *delayed_work; + struct adf_telemetry *telemetry; + struct adf_tl_hw_data *tl_data; + u32 msg_cnt, old_msg_cnt; + size_t layout_sz; + u32 *regs_data; + size_t id; + + delayed_work = to_delayed_work(work); + telemetry = container_of(delayed_work, struct adf_telemetry, work_ctx); + tl_data = &GET_TL_DATA(telemetry->accel_dev); + regs_data = telemetry->regs_data; + + id = tl_data->msg_cnt_off / sizeof(*regs_data); + layout_sz = tl_data->layout_sz; + + if (!atomic_read(&telemetry->state)) { + cancel_delayed_work_sync(&telemetry->work_ctx); + return; + } + + msg_cnt = regs_data[id]; + old_msg_cnt = msg_cnt; + if (msg_cnt == telemetry->msg_cnt) + goto out; + + mutex_lock(&telemetry->regs_hist_lock); + + snapshot_regs(telemetry, layout_sz); + + /* Check if data changed while updating it */ + msg_cnt = regs_data[id]; + if (old_msg_cnt != msg_cnt) + snapshot_regs(telemetry, layout_sz); + + telemetry->msg_cnt = msg_cnt; + telemetry->hb_num++; + telemetry->hb_num %= telemetry->hbuffs; + + mutex_unlock(&telemetry->regs_hist_lock); + +out: + adf_misc_wq_queue_delayed_work(&telemetry->work_ctx, get_next_timeout()); +} + +int adf_tl_halt(struct adf_accel_dev *accel_dev) +{ + struct adf_telemetry *telemetry = accel_dev->telemetry; + struct device *dev = &GET_DEV(accel_dev); + int ret; + + cancel_delayed_work_sync(&telemetry->work_ctx); + atomic_set(&telemetry->state, 0); + + ret = adf_send_admin_tl_stop(accel_dev); + if (ret) + dev_err(dev, "failed to stop telemetry\n"); + + return ret; +} + +int adf_tl_run(struct adf_accel_dev *accel_dev, int state) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); + struct adf_telemetry *telemetry = accel_dev->telemetry; + struct device *dev = &GET_DEV(accel_dev); + size_t layout_sz = tl_data->layout_sz; + int ret; + + ret = adf_send_admin_tl_start(accel_dev, telemetry->regs_data_p, + layout_sz, telemetry->rp_num_indexes, + &telemetry->slice_cnt); + if (ret) { + dev_err(dev, "failed to start telemetry\n"); + return ret; + } + + telemetry->hbuffs = state; + atomic_set(&telemetry->state, state); + + adf_misc_wq_queue_delayed_work(&telemetry->work_ctx, get_next_timeout()); + + return 0; +} + +int adf_tl_init(struct adf_accel_dev *accel_dev) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); + u8 max_rp = GET_TL_DATA(accel_dev).max_rp; + struct device *dev = &GET_DEV(accel_dev); + struct adf_telemetry *telemetry; + unsigned int i; + int ret; + + ret = validate_tl_data(tl_data); + if (ret) + return ret; + + ret = adf_tl_alloc_mem(accel_dev); + if (ret) { + dev_err(dev, "failed to initialize: %d\n", ret); + return ret; + } + + telemetry = accel_dev->telemetry; + telemetry->accel_dev = accel_dev; + + mutex_init(&telemetry->wr_lock); + mutex_init(&telemetry->regs_hist_lock); + INIT_DELAYED_WORK(&telemetry->work_ctx, tl_work_handler); + + for (i = 0; i < max_rp; i++) + telemetry->rp_num_indexes[i] = ADF_TL_RP_REGS_DISABLED; + + return 0; +} + +int adf_tl_start(struct adf_accel_dev *accel_dev) +{ + struct device *dev = &GET_DEV(accel_dev); + + if (!accel_dev->telemetry) + return -EOPNOTSUPP; + + if (!is_tl_supported(accel_dev)) { + dev_info(dev, "feature not supported by FW\n"); + adf_tl_free_mem(accel_dev); + return -EOPNOTSUPP; + } + + return 0; +} + +void adf_tl_stop(struct adf_accel_dev *accel_dev) +{ + if (!accel_dev->telemetry) + return; + + if (atomic_read(&accel_dev->telemetry->state)) + adf_tl_halt(accel_dev); +} + +void adf_tl_shutdown(struct adf_accel_dev *accel_dev) +{ + if (!accel_dev->telemetry) + return; + + adf_tl_free_mem(accel_dev); +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_telemetry.h b/drivers/crypto/intel/qat/qat_common/adf_telemetry.h new file mode 100644 index 0000000000..9be81cd3b8 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_telemetry.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2023 Intel Corporation. */ +#ifndef ADF_TELEMETRY_H +#define ADF_TELEMETRY_H + +#include <linux/bits.h> +#include <linux/mutex.h> +#include <linux/types.h> +#include <linux/workqueue.h> + +#include "icp_qat_fw_init_admin.h" + +struct adf_accel_dev; +struct adf_tl_dbg_counter; +struct dentry; + +#define ADF_TL_SL_CNT_COUNT \ + (sizeof(struct icp_qat_fw_init_admin_slice_cnt) / sizeof(__u8)) + +#define TL_CAPABILITY_BIT BIT(1) +/* Interval within device writes data to DMA region. Value in milliseconds. */ +#define ADF_TL_DATA_WR_INTERVAL_MS 1000 +/* Interval within timer interrupt should be handled. Value in milliseconds. */ +#define ADF_TL_TIMER_INT_MS (ADF_TL_DATA_WR_INTERVAL_MS / 2) + +#define ADF_TL_RP_REGS_DISABLED (0xff) + +struct adf_tl_hw_data { + size_t layout_sz; + size_t slice_reg_sz; + size_t rp_reg_sz; + size_t msg_cnt_off; + const struct adf_tl_dbg_counter *dev_counters; + const struct adf_tl_dbg_counter *sl_util_counters; + const struct adf_tl_dbg_counter *sl_exec_counters; + const struct adf_tl_dbg_counter *rp_counters; + u8 num_hbuff; + u8 cpp_ns_per_cycle; + u8 bw_units_to_bytes; + u8 num_dev_counters; + u8 num_rp_counters; + u8 max_rp; +}; + +struct adf_telemetry { + struct adf_accel_dev *accel_dev; + atomic_t state; + u32 hbuffs; + int hb_num; + u32 msg_cnt; + dma_addr_t regs_data_p; /* bus address for DMA mapping */ + void *regs_data; /* virtual address for DMA mapping */ + /** + * @regs_hist_buff: array of pointers to copies of the last @hbuffs + * values of @regs_data + */ + void **regs_hist_buff; + struct dentry *dbg_dir; + u8 *rp_num_indexes; + /** + * @regs_hist_lock: protects from race conditions between write and read + * to the copies referenced by @regs_hist_buff + */ + struct mutex regs_hist_lock; + /** + * @wr_lock: protects from concurrent writes to debugfs telemetry files + */ + struct mutex wr_lock; + struct delayed_work work_ctx; + struct icp_qat_fw_init_admin_slice_cnt slice_cnt; +}; + +#ifdef CONFIG_DEBUG_FS +int adf_tl_init(struct adf_accel_dev *accel_dev); +int adf_tl_start(struct adf_accel_dev *accel_dev); +void adf_tl_stop(struct adf_accel_dev *accel_dev); +void adf_tl_shutdown(struct adf_accel_dev *accel_dev); +int adf_tl_run(struct adf_accel_dev *accel_dev, int state); +int adf_tl_halt(struct adf_accel_dev *accel_dev); +#else +static inline int adf_tl_init(struct adf_accel_dev *accel_dev) +{ + return 0; +} + +static inline int adf_tl_start(struct adf_accel_dev *accel_dev) +{ + return 0; +} + +static inline void adf_tl_stop(struct adf_accel_dev *accel_dev) +{ +} + +static inline void adf_tl_shutdown(struct adf_accel_dev *accel_dev) +{ +} +#endif /* CONFIG_DEBUG_FS */ +#endif /* ADF_TELEMETRY_H */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c b/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c new file mode 100644 index 0000000000..c8241f5a0a --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.c @@ -0,0 +1,710 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2023 Intel Corporation. */ +#define dev_fmt(fmt) "Telemetry debugfs: " fmt + +#include <linux/atomic.h> +#include <linux/debugfs.h> +#include <linux/dev_printk.h> +#include <linux/dcache.h> +#include <linux/file.h> +#include <linux/kernel.h> +#include <linux/math64.h> +#include <linux/mutex.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/units.h> + +#include "adf_accel_devices.h" +#include "adf_cfg_strings.h" +#include "adf_telemetry.h" +#include "adf_tl_debugfs.h" + +#define TL_VALUE_MIN_PADDING 20 +#define TL_KEY_MIN_PADDING 23 +#define TL_RP_SRV_UNKNOWN "Unknown" + +static int tl_collect_values_u32(struct adf_telemetry *telemetry, + size_t counter_offset, u64 *arr) +{ + unsigned int samples, hb_idx, i; + u32 *regs_hist_buff; + u32 counter_val; + + samples = min(telemetry->msg_cnt, telemetry->hbuffs); + hb_idx = telemetry->hb_num + telemetry->hbuffs - samples; + + mutex_lock(&telemetry->regs_hist_lock); + + for (i = 0; i < samples; i++) { + regs_hist_buff = telemetry->regs_hist_buff[hb_idx % telemetry->hbuffs]; + counter_val = regs_hist_buff[counter_offset / sizeof(counter_val)]; + arr[i] = counter_val; + hb_idx++; + } + + mutex_unlock(&telemetry->regs_hist_lock); + + return samples; +} + +static int tl_collect_values_u64(struct adf_telemetry *telemetry, + size_t counter_offset, u64 *arr) +{ + unsigned int samples, hb_idx, i; + u64 *regs_hist_buff; + u64 counter_val; + + samples = min(telemetry->msg_cnt, telemetry->hbuffs); + hb_idx = telemetry->hb_num + telemetry->hbuffs - samples; + + mutex_lock(&telemetry->regs_hist_lock); + + for (i = 0; i < samples; i++) { + regs_hist_buff = telemetry->regs_hist_buff[hb_idx % telemetry->hbuffs]; + counter_val = regs_hist_buff[counter_offset / sizeof(counter_val)]; + arr[i] = counter_val; + hb_idx++; + } + + mutex_unlock(&telemetry->regs_hist_lock); + + return samples; +} + +/** + * avg_array() - Return average of values within an array. + * @array: Array of values. + * @len: Number of elements. + * + * This algorithm computes average of an array without running into overflow. + * + * Return: average of values. + */ +#define avg_array(array, len) ( \ +{ \ + typeof(&(array)[0]) _array = (array); \ + __unqual_scalar_typeof(_array[0]) _x = 0; \ + __unqual_scalar_typeof(_array[0]) _y = 0; \ + __unqual_scalar_typeof(_array[0]) _a, _b; \ + typeof(len) _len = (len); \ + size_t _i; \ + \ + for (_i = 0; _i < _len; _i++) { \ + _a = _array[_i]; \ + _b = do_div(_a, _len); \ + _x += _a; \ + if (_y >= _len - _b) { \ + _x++; \ + _y -= _len - _b; \ + } else { \ + _y += _b; \ + } \ + } \ + do_div(_y, _len); \ + (_x + _y); \ +}) + +/* Calculation function for simple counter. */ +static int tl_calc_count(struct adf_telemetry *telemetry, + const struct adf_tl_dbg_counter *ctr, + struct adf_tl_dbg_aggr_values *vals) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(telemetry->accel_dev); + u64 *hist_vals; + int sample_cnt; + int ret = 0; + + hist_vals = kmalloc_array(tl_data->num_hbuff, sizeof(*hist_vals), + GFP_KERNEL); + if (!hist_vals) + return -ENOMEM; + + memset(vals, 0, sizeof(*vals)); + sample_cnt = tl_collect_values_u32(telemetry, ctr->offset1, hist_vals); + if (!sample_cnt) + goto out_free_hist_vals; + + vals->curr = hist_vals[sample_cnt - 1]; + vals->min = min_array(hist_vals, sample_cnt); + vals->max = max_array(hist_vals, sample_cnt); + vals->avg = avg_array(hist_vals, sample_cnt); + +out_free_hist_vals: + kfree(hist_vals); + return ret; +} + +/* Convert CPP bus cycles to ns. */ +static int tl_cycles_to_ns(struct adf_telemetry *telemetry, + const struct adf_tl_dbg_counter *ctr, + struct adf_tl_dbg_aggr_values *vals) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(telemetry->accel_dev); + u8 cpp_ns_per_cycle = tl_data->cpp_ns_per_cycle; + int ret; + + ret = tl_calc_count(telemetry, ctr, vals); + if (ret) + return ret; + + vals->curr *= cpp_ns_per_cycle; + vals->min *= cpp_ns_per_cycle; + vals->max *= cpp_ns_per_cycle; + vals->avg *= cpp_ns_per_cycle; + + return 0; +} + +/* + * Compute latency cumulative average with division of accumulated value + * by sample count. Returned value is in ns. + */ +static int tl_lat_acc_avg(struct adf_telemetry *telemetry, + const struct adf_tl_dbg_counter *ctr, + struct adf_tl_dbg_aggr_values *vals) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(telemetry->accel_dev); + u8 cpp_ns_per_cycle = tl_data->cpp_ns_per_cycle; + u8 num_hbuff = tl_data->num_hbuff; + int sample_cnt, i; + u64 *hist_vals; + u64 *hist_cnt; + int ret = 0; + + hist_vals = kmalloc_array(num_hbuff, sizeof(*hist_vals), GFP_KERNEL); + if (!hist_vals) + return -ENOMEM; + + hist_cnt = kmalloc_array(num_hbuff, sizeof(*hist_cnt), GFP_KERNEL); + if (!hist_cnt) { + ret = -ENOMEM; + goto out_free_hist_vals; + } + + memset(vals, 0, sizeof(*vals)); + sample_cnt = tl_collect_values_u64(telemetry, ctr->offset1, hist_vals); + if (!sample_cnt) + goto out_free_hist_cnt; + + tl_collect_values_u32(telemetry, ctr->offset2, hist_cnt); + + for (i = 0; i < sample_cnt; i++) { + /* Avoid division by 0 if count is 0. */ + if (hist_cnt[i]) + hist_vals[i] = div_u64(hist_vals[i] * cpp_ns_per_cycle, + hist_cnt[i]); + else + hist_vals[i] = 0; + } + + vals->curr = hist_vals[sample_cnt - 1]; + vals->min = min_array(hist_vals, sample_cnt); + vals->max = max_array(hist_vals, sample_cnt); + vals->avg = avg_array(hist_vals, sample_cnt); + +out_free_hist_cnt: + kfree(hist_cnt); +out_free_hist_vals: + kfree(hist_vals); + return ret; +} + +/* Convert HW raw bandwidth units to Mbps. */ +static int tl_bw_hw_units_to_mbps(struct adf_telemetry *telemetry, + const struct adf_tl_dbg_counter *ctr, + struct adf_tl_dbg_aggr_values *vals) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(telemetry->accel_dev); + u16 bw_hw_2_bits = tl_data->bw_units_to_bytes * BITS_PER_BYTE; + u64 *hist_vals; + int sample_cnt; + int ret = 0; + + hist_vals = kmalloc_array(tl_data->num_hbuff, sizeof(*hist_vals), + GFP_KERNEL); + if (!hist_vals) + return -ENOMEM; + + memset(vals, 0, sizeof(*vals)); + sample_cnt = tl_collect_values_u32(telemetry, ctr->offset1, hist_vals); + if (!sample_cnt) + goto out_free_hist_vals; + + vals->curr = div_u64(hist_vals[sample_cnt - 1] * bw_hw_2_bits, MEGA); + vals->min = div_u64(min_array(hist_vals, sample_cnt) * bw_hw_2_bits, MEGA); + vals->max = div_u64(max_array(hist_vals, sample_cnt) * bw_hw_2_bits, MEGA); + vals->avg = div_u64(avg_array(hist_vals, sample_cnt) * bw_hw_2_bits, MEGA); + +out_free_hist_vals: + kfree(hist_vals); + return ret; +} + +static void tl_seq_printf_counter(struct adf_telemetry *telemetry, + struct seq_file *s, const char *name, + struct adf_tl_dbg_aggr_values *vals) +{ + seq_printf(s, "%-*s", TL_KEY_MIN_PADDING, name); + seq_printf(s, "%*llu", TL_VALUE_MIN_PADDING, vals->curr); + if (atomic_read(&telemetry->state) > 1) { + seq_printf(s, "%*llu", TL_VALUE_MIN_PADDING, vals->min); + seq_printf(s, "%*llu", TL_VALUE_MIN_PADDING, vals->max); + seq_printf(s, "%*llu", TL_VALUE_MIN_PADDING, vals->avg); + } + seq_puts(s, "\n"); +} + +static int tl_calc_and_print_counter(struct adf_telemetry *telemetry, + struct seq_file *s, + const struct adf_tl_dbg_counter *ctr, + const char *name) +{ + const char *counter_name = name ? name : ctr->name; + enum adf_tl_counter_type type = ctr->type; + struct adf_tl_dbg_aggr_values vals; + int ret; + + switch (type) { + case ADF_TL_SIMPLE_COUNT: + ret = tl_calc_count(telemetry, ctr, &vals); + break; + case ADF_TL_COUNTER_NS: + ret = tl_cycles_to_ns(telemetry, ctr, &vals); + break; + case ADF_TL_COUNTER_NS_AVG: + ret = tl_lat_acc_avg(telemetry, ctr, &vals); + break; + case ADF_TL_COUNTER_MBPS: + ret = tl_bw_hw_units_to_mbps(telemetry, ctr, &vals); + break; + default: + return -EINVAL; + } + + if (ret) + return ret; + + tl_seq_printf_counter(telemetry, s, counter_name, &vals); + + return 0; +} + +static int tl_print_sl_counter(struct adf_telemetry *telemetry, + const struct adf_tl_dbg_counter *ctr, + struct seq_file *s, u8 cnt_id) +{ + size_t sl_regs_sz = GET_TL_DATA(telemetry->accel_dev).slice_reg_sz; + struct adf_tl_dbg_counter slice_ctr; + size_t offset_inc = cnt_id * sl_regs_sz; + char cnt_name[MAX_COUNT_NAME_SIZE]; + + snprintf(cnt_name, MAX_COUNT_NAME_SIZE, "%s%d", ctr->name, cnt_id); + slice_ctr = *ctr; + slice_ctr.offset1 += offset_inc; + + return tl_calc_and_print_counter(telemetry, s, &slice_ctr, cnt_name); +} + +static int tl_calc_and_print_sl_counters(struct adf_accel_dev *accel_dev, + struct seq_file *s, u8 cnt_type, u8 cnt_id) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); + struct adf_telemetry *telemetry = accel_dev->telemetry; + const struct adf_tl_dbg_counter *sl_tl_util_counters; + const struct adf_tl_dbg_counter *sl_tl_exec_counters; + const struct adf_tl_dbg_counter *ctr; + int ret; + + sl_tl_util_counters = tl_data->sl_util_counters; + sl_tl_exec_counters = tl_data->sl_exec_counters; + + ctr = &sl_tl_util_counters[cnt_type]; + + ret = tl_print_sl_counter(telemetry, ctr, s, cnt_id); + if (ret) { + dev_notice(&GET_DEV(accel_dev), + "invalid slice utilization counter type\n"); + return ret; + } + + ctr = &sl_tl_exec_counters[cnt_type]; + + ret = tl_print_sl_counter(telemetry, ctr, s, cnt_id); + if (ret) { + dev_notice(&GET_DEV(accel_dev), + "invalid slice execution counter type\n"); + return ret; + } + + return 0; +} + +static void tl_print_msg_cnt(struct seq_file *s, u32 msg_cnt) +{ + seq_printf(s, "%-*s", TL_KEY_MIN_PADDING, SNAPSHOT_CNT_MSG); + seq_printf(s, "%*u\n", TL_VALUE_MIN_PADDING, msg_cnt); +} + +static int tl_print_dev_data(struct adf_accel_dev *accel_dev, + struct seq_file *s) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); + struct adf_telemetry *telemetry = accel_dev->telemetry; + const struct adf_tl_dbg_counter *dev_tl_counters; + u8 num_dev_counters = tl_data->num_dev_counters; + u8 *sl_cnt = (u8 *)&telemetry->slice_cnt; + const struct adf_tl_dbg_counter *ctr; + unsigned int i; + int ret; + u8 j; + + if (!atomic_read(&telemetry->state)) { + dev_info(&GET_DEV(accel_dev), "not enabled\n"); + return -EPERM; + } + + dev_tl_counters = tl_data->dev_counters; + + tl_print_msg_cnt(s, telemetry->msg_cnt); + + /* Print device level telemetry. */ + for (i = 0; i < num_dev_counters; i++) { + ctr = &dev_tl_counters[i]; + ret = tl_calc_and_print_counter(telemetry, s, ctr, NULL); + if (ret) { + dev_notice(&GET_DEV(accel_dev), + "invalid counter type\n"); + return ret; + } + } + + /* Print per slice telemetry. */ + for (i = 0; i < ADF_TL_SL_CNT_COUNT; i++) { + for (j = 0; j < sl_cnt[i]; j++) { + ret = tl_calc_and_print_sl_counters(accel_dev, s, i, j); + if (ret) + return ret; + } + } + + return 0; +} + +static int tl_dev_data_show(struct seq_file *s, void *unused) +{ + struct adf_accel_dev *accel_dev = s->private; + + if (!accel_dev) + return -EINVAL; + + return tl_print_dev_data(accel_dev, s); +} +DEFINE_SHOW_ATTRIBUTE(tl_dev_data); + +static int tl_control_show(struct seq_file *s, void *unused) +{ + struct adf_accel_dev *accel_dev = s->private; + + if (!accel_dev) + return -EINVAL; + + seq_printf(s, "%d\n", atomic_read(&accel_dev->telemetry->state)); + + return 0; +} + +static ssize_t tl_control_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq_f = file->private_data; + struct adf_accel_dev *accel_dev; + struct adf_telemetry *telemetry; + struct adf_tl_hw_data *tl_data; + struct device *dev; + u32 input; + int ret; + + accel_dev = seq_f->private; + if (!accel_dev) + return -EINVAL; + + tl_data = &GET_TL_DATA(accel_dev); + telemetry = accel_dev->telemetry; + dev = &GET_DEV(accel_dev); + + mutex_lock(&telemetry->wr_lock); + + ret = kstrtou32_from_user(userbuf, count, 10, &input); + if (ret) + goto unlock_and_exit; + + if (input > tl_data->num_hbuff) { + dev_info(dev, "invalid control input\n"); + ret = -EINVAL; + goto unlock_and_exit; + } + + /* If input is 0, just stop telemetry. */ + if (!input) { + ret = adf_tl_halt(accel_dev); + if (!ret) + ret = count; + + goto unlock_and_exit; + } + + /* If TL is already enabled, stop it. */ + if (atomic_read(&telemetry->state)) { + dev_info(dev, "already enabled, restarting.\n"); + ret = adf_tl_halt(accel_dev); + if (ret) + goto unlock_and_exit; + } + + ret = adf_tl_run(accel_dev, input); + if (ret) + goto unlock_and_exit; + + ret = count; + +unlock_and_exit: + mutex_unlock(&telemetry->wr_lock); + return ret; +} +DEFINE_SHOW_STORE_ATTRIBUTE(tl_control); + +static int get_rp_index_from_file(const struct file *f, u8 *rp_id, u8 rp_num) +{ + char alpha; + u8 index; + int ret; + + ret = sscanf(f->f_path.dentry->d_name.name, ADF_TL_RP_REGS_FNAME, &alpha); + if (ret != 1) + return -EINVAL; + + index = ADF_TL_DBG_RP_INDEX_ALPHA(alpha); + *rp_id = index; + + return 0; +} + +static int adf_tl_dbg_change_rp_index(struct adf_accel_dev *accel_dev, + unsigned int new_rp_num, + unsigned int rp_regs_index) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_telemetry *telemetry = accel_dev->telemetry; + struct device *dev = &GET_DEV(accel_dev); + unsigned int i; + u8 curr_state; + int ret; + + if (new_rp_num >= hw_data->num_rps) { + dev_info(dev, "invalid Ring Pair number selected\n"); + return -EINVAL; + } + + for (i = 0; i < hw_data->tl_data.max_rp; i++) { + if (telemetry->rp_num_indexes[i] == new_rp_num) { + dev_info(dev, "RP nr: %d is already selected in slot rp_%c_data\n", + new_rp_num, ADF_TL_DBG_RP_ALPHA_INDEX(i)); + return 0; + } + } + + dev_dbg(dev, "selecting RP nr %u into slot rp_%c_data\n", + new_rp_num, ADF_TL_DBG_RP_ALPHA_INDEX(rp_regs_index)); + + curr_state = atomic_read(&telemetry->state); + + if (curr_state) { + ret = adf_tl_halt(accel_dev); + if (ret) + return ret; + + telemetry->rp_num_indexes[rp_regs_index] = new_rp_num; + + ret = adf_tl_run(accel_dev, curr_state); + if (ret) + return ret; + } else { + telemetry->rp_num_indexes[rp_regs_index] = new_rp_num; + } + + return 0; +} + +static void tl_print_rp_srv(struct adf_accel_dev *accel_dev, struct seq_file *s, + u8 rp_idx) +{ + u32 banks_per_vf = GET_HW_DATA(accel_dev)->num_banks_per_vf; + enum adf_cfg_service_type svc; + + seq_printf(s, "%-*s", TL_KEY_MIN_PADDING, RP_SERVICE_TYPE); + + svc = GET_SRV_TYPE(accel_dev, rp_idx % banks_per_vf); + switch (svc) { + case COMP: + seq_printf(s, "%*s\n", TL_VALUE_MIN_PADDING, ADF_CFG_DC); + break; + case SYM: + seq_printf(s, "%*s\n", TL_VALUE_MIN_PADDING, ADF_CFG_SYM); + break; + case ASYM: + seq_printf(s, "%*s\n", TL_VALUE_MIN_PADDING, ADF_CFG_ASYM); + break; + default: + seq_printf(s, "%*s\n", TL_VALUE_MIN_PADDING, TL_RP_SRV_UNKNOWN); + break; + } +} + +static int tl_print_rp_data(struct adf_accel_dev *accel_dev, struct seq_file *s, + u8 rp_regs_index) +{ + struct adf_tl_hw_data *tl_data = &GET_TL_DATA(accel_dev); + struct adf_telemetry *telemetry = accel_dev->telemetry; + const struct adf_tl_dbg_counter *rp_tl_counters; + u8 num_rp_counters = tl_data->num_rp_counters; + size_t rp_regs_sz = tl_data->rp_reg_sz; + struct adf_tl_dbg_counter ctr; + unsigned int i; + u8 rp_idx; + int ret; + + if (!atomic_read(&telemetry->state)) { + dev_info(&GET_DEV(accel_dev), "not enabled\n"); + return -EPERM; + } + + rp_tl_counters = tl_data->rp_counters; + rp_idx = telemetry->rp_num_indexes[rp_regs_index]; + + if (rp_idx == ADF_TL_RP_REGS_DISABLED) { + dev_info(&GET_DEV(accel_dev), "no RP number selected in rp_%c_data\n", + ADF_TL_DBG_RP_ALPHA_INDEX(rp_regs_index)); + return -EPERM; + } + + tl_print_msg_cnt(s, telemetry->msg_cnt); + seq_printf(s, "%-*s", TL_KEY_MIN_PADDING, RP_NUM_INDEX); + seq_printf(s, "%*d\n", TL_VALUE_MIN_PADDING, rp_idx); + tl_print_rp_srv(accel_dev, s, rp_idx); + + for (i = 0; i < num_rp_counters; i++) { + ctr = rp_tl_counters[i]; + ctr.offset1 += rp_regs_sz * rp_regs_index; + ctr.offset2 += rp_regs_sz * rp_regs_index; + ret = tl_calc_and_print_counter(telemetry, s, &ctr, NULL); + if (ret) { + dev_dbg(&GET_DEV(accel_dev), + "invalid RP counter type\n"); + return ret; + } + } + + return 0; +} + +static int tl_rp_data_show(struct seq_file *s, void *unused) +{ + struct adf_accel_dev *accel_dev = s->private; + u8 rp_regs_index; + u8 max_rp; + int ret; + + if (!accel_dev) + return -EINVAL; + + max_rp = GET_TL_DATA(accel_dev).max_rp; + ret = get_rp_index_from_file(s->file, &rp_regs_index, max_rp); + if (ret) { + dev_dbg(&GET_DEV(accel_dev), "invalid RP data file name\n"); + return ret; + } + + return tl_print_rp_data(accel_dev, s, rp_regs_index); +} + +static ssize_t tl_rp_data_write(struct file *file, const char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct seq_file *seq_f = file->private_data; + struct adf_accel_dev *accel_dev; + struct adf_telemetry *telemetry; + unsigned int new_rp_num; + u8 rp_regs_index; + u8 max_rp; + int ret; + + accel_dev = seq_f->private; + if (!accel_dev) + return -EINVAL; + + telemetry = accel_dev->telemetry; + max_rp = GET_TL_DATA(accel_dev).max_rp; + + mutex_lock(&telemetry->wr_lock); + + ret = get_rp_index_from_file(file, &rp_regs_index, max_rp); + if (ret) { + dev_dbg(&GET_DEV(accel_dev), "invalid RP data file name\n"); + goto unlock_and_exit; + } + + ret = kstrtou32_from_user(userbuf, count, 10, &new_rp_num); + if (ret) + goto unlock_and_exit; + + ret = adf_tl_dbg_change_rp_index(accel_dev, new_rp_num, rp_regs_index); + if (ret) + goto unlock_and_exit; + + ret = count; + +unlock_and_exit: + mutex_unlock(&telemetry->wr_lock); + return ret; +} +DEFINE_SHOW_STORE_ATTRIBUTE(tl_rp_data); + +void adf_tl_dbgfs_add(struct adf_accel_dev *accel_dev) +{ + struct adf_telemetry *telemetry = accel_dev->telemetry; + struct dentry *parent = accel_dev->debugfs_dir; + u8 max_rp = GET_TL_DATA(accel_dev).max_rp; + char name[ADF_TL_RP_REGS_FNAME_SIZE]; + struct dentry *dir; + unsigned int i; + + if (!telemetry) + return; + + dir = debugfs_create_dir("telemetry", parent); + accel_dev->telemetry->dbg_dir = dir; + debugfs_create_file("device_data", 0444, dir, accel_dev, &tl_dev_data_fops); + debugfs_create_file("control", 0644, dir, accel_dev, &tl_control_fops); + + for (i = 0; i < max_rp; i++) { + snprintf(name, sizeof(name), ADF_TL_RP_REGS_FNAME, + ADF_TL_DBG_RP_ALPHA_INDEX(i)); + debugfs_create_file(name, 0644, dir, accel_dev, &tl_rp_data_fops); + } +} + +void adf_tl_dbgfs_rm(struct adf_accel_dev *accel_dev) +{ + struct adf_telemetry *telemetry = accel_dev->telemetry; + struct dentry *dbg_dir; + + if (!telemetry) + return; + + dbg_dir = telemetry->dbg_dir; + + debugfs_remove_recursive(dbg_dir); + + if (atomic_read(&telemetry->state)) + adf_tl_halt(accel_dev); +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.h b/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.h new file mode 100644 index 0000000000..11cc9eae19 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_tl_debugfs.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2023 Intel Corporation. */ +#ifndef ADF_TL_DEBUGFS_H +#define ADF_TL_DEBUGFS_H + +#include <linux/types.h> + +struct adf_accel_dev; + +#define MAX_COUNT_NAME_SIZE 32 +#define SNAPSHOT_CNT_MSG "sample_cnt" +#define RP_NUM_INDEX "rp_num" +#define PCI_TRANS_CNT_NAME "pci_trans_cnt" +#define MAX_RD_LAT_NAME "max_rd_lat" +#define RD_LAT_ACC_NAME "rd_lat_acc_avg" +#define MAX_LAT_NAME "max_gp_lat" +#define LAT_ACC_NAME "gp_lat_acc_avg" +#define BW_IN_NAME "bw_in" +#define BW_OUT_NAME "bw_out" +#define PAGE_REQ_LAT_NAME "at_page_req_lat_avg" +#define AT_TRANS_LAT_NAME "at_trans_lat_avg" +#define AT_MAX_UTLB_USED_NAME "at_max_tlb_used" +#define AT_GLOB_DTLB_HIT_NAME "at_glob_devtlb_hit" +#define AT_GLOB_DTLB_MISS_NAME "at_glob_devtlb_miss" +#define AT_PAYLD_DTLB_HIT_NAME "tl_at_payld_devtlb_hit" +#define AT_PAYLD_DTLB_MISS_NAME "tl_at_payld_devtlb_miss" +#define RP_SERVICE_TYPE "service_type" + +#define ADF_TL_DBG_RP_ALPHA_INDEX(index) ((index) + 'A') +#define ADF_TL_DBG_RP_INDEX_ALPHA(alpha) ((alpha) - 'A') + +#define ADF_TL_RP_REGS_FNAME "rp_%c_data" +#define ADF_TL_RP_REGS_FNAME_SIZE 16 + +#define ADF_TL_DATA_REG_OFF(reg, qat_gen) \ + offsetof(struct adf_##qat_gen##_tl_layout, reg) + +#define ADF_TL_DEV_REG_OFF(reg, qat_gen) \ + (ADF_TL_DATA_REG_OFF(tl_device_data_regs, qat_gen) + \ + offsetof(struct adf_##qat_gen##_tl_device_data_regs, reg)) + +#define ADF_TL_SLICE_REG_OFF(slice, reg, qat_gen) \ + (ADF_TL_DEV_REG_OFF(slice##_slices[0], qat_gen) + \ + offsetof(struct adf_##qat_gen##_tl_slice_data_regs, reg)) + +#define ADF_TL_RP_REG_OFF(reg, qat_gen) \ + (ADF_TL_DATA_REG_OFF(tl_ring_pairs_data_regs[0], qat_gen) + \ + offsetof(struct adf_##qat_gen##_tl_ring_pair_data_regs, reg)) + +/** + * enum adf_tl_counter_type - telemetry counter types + * @ADF_TL_COUNTER_UNSUPPORTED: unsupported counter + * @ADF_TL_SIMPLE_COUNT: simple counter + * @ADF_TL_COUNTER_NS: latency counter, value in ns + * @ADF_TL_COUNTER_NS_AVG: accumulated average latency counter, value in ns + * @ADF_TL_COUNTER_MBPS: bandwidth, value in MBps + */ +enum adf_tl_counter_type { + ADF_TL_COUNTER_UNSUPPORTED, + ADF_TL_SIMPLE_COUNT, + ADF_TL_COUNTER_NS, + ADF_TL_COUNTER_NS_AVG, + ADF_TL_COUNTER_MBPS, +}; + +/** + * struct adf_tl_dbg_counter - telemetry counter definition + * @name: name of the counter as printed in the report + * @adf_tl_counter_type: type of the counter + * @offset1: offset of 1st register + * @offset2: offset of 2nd optional register + */ +struct adf_tl_dbg_counter { + const char *name; + enum adf_tl_counter_type type; + size_t offset1; + size_t offset2; +}; + +#define ADF_TL_COUNTER(_name, _type, _offset) \ +{ .name = _name, \ + .type = _type, \ + .offset1 = _offset \ +} + +#define ADF_TL_COUNTER_LATENCY(_name, _type, _offset1, _offset2) \ +{ .name = _name, \ + .type = _type, \ + .offset1 = _offset1, \ + .offset2 = _offset2 \ +} + +/* Telemetry counter aggregated values. */ +struct adf_tl_dbg_aggr_values { + u64 curr; + u64 min; + u64 max; + u64 avg; +}; + +/** + * adf_tl_dbgfs_add() - Add telemetry's debug fs entries. + * @accel_dev: Pointer to acceleration device. + * + * Creates telemetry's debug fs folder and attributes in QAT debug fs root. + */ +void adf_tl_dbgfs_add(struct adf_accel_dev *accel_dev); + +/** + * adf_tl_dbgfs_rm() - Remove telemetry's debug fs entries. + * @accel_dev: Pointer to acceleration device. + * + * Removes telemetry's debug fs folder and attributes from QAT debug fs root. + */ +void adf_tl_dbgfs_rm(struct adf_accel_dev *accel_dev); + +#endif /* ADF_TL_DEBUGFS_H */ diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h index cd418b51d9..63cf18e2a4 100644 --- a/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h +++ b/drivers/crypto/intel/qat/qat_common/icp_qat_fw_init_admin.h @@ -29,6 +29,8 @@ enum icp_qat_fw_init_admin_cmd_id { ICP_QAT_FW_RL_ADD = 134, ICP_QAT_FW_RL_UPDATE = 135, ICP_QAT_FW_RL_REMOVE = 136, + ICP_QAT_FW_TL_START = 137, + ICP_QAT_FW_TL_STOP = 138, }; enum icp_qat_fw_init_admin_resp_status { @@ -36,6 +38,13 @@ enum icp_qat_fw_init_admin_resp_status { ICP_QAT_FW_INIT_RESP_STATUS_FAIL }; +struct icp_qat_fw_init_admin_tl_rp_indexes { + __u8 rp_num_index_0; + __u8 rp_num_index_1; + __u8 rp_num_index_2; + __u8 rp_num_index_3; +}; + struct icp_qat_fw_init_admin_slice_cnt { __u8 cpr_cnt; __u8 xlt_cnt; @@ -87,6 +96,7 @@ struct icp_qat_fw_init_admin_req { __u8 rp_count; }; __u32 idle_filter; + struct icp_qat_fw_init_admin_tl_rp_indexes rp_indexes; }; __u32 resrvd4; diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h b/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h index eb2ef225bc..b8f1c4ffb8 100644 --- a/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h +++ b/drivers/crypto/intel/qat/qat_common/icp_qat_hw.h @@ -18,7 +18,12 @@ enum icp_qat_hw_ae_id { ICP_QAT_HW_AE_9 = 9, ICP_QAT_HW_AE_10 = 10, ICP_QAT_HW_AE_11 = 11, - ICP_QAT_HW_AE_DELIMITER = 12 + ICP_QAT_HW_AE_12 = 12, + ICP_QAT_HW_AE_13 = 13, + ICP_QAT_HW_AE_14 = 14, + ICP_QAT_HW_AE_15 = 15, + ICP_QAT_HW_AE_16 = 16, + ICP_QAT_HW_AE_DELIMITER = 17 }; enum icp_qat_hw_qat_id { @@ -95,7 +100,7 @@ enum icp_qat_capabilities_mask { /* Bits 10-11 are currently reserved */ ICP_ACCEL_CAPABILITIES_HKDF = BIT(12), ICP_ACCEL_CAPABILITIES_ECEDMONT = BIT(13), - /* Bit 14 is currently reserved */ + ICP_ACCEL_CAPABILITIES_EXT_ALGCHAIN = BIT(14), ICP_ACCEL_CAPABILITIES_SHA3_EXT = BIT(15), ICP_ACCEL_CAPABILITIES_AESGCM_SPC = BIT(16), ICP_ACCEL_CAPABILITIES_CHACHA_POLY = BIT(17), @@ -107,7 +112,10 @@ enum icp_qat_capabilities_mask { ICP_ACCEL_CAPABILITIES_CNV_INTEGRITY64 = BIT(23), ICP_ACCEL_CAPABILITIES_LZ4_COMPRESSION = BIT(24), ICP_ACCEL_CAPABILITIES_LZ4S_COMPRESSION = BIT(25), - ICP_ACCEL_CAPABILITIES_AES_V2 = BIT(26) + ICP_ACCEL_CAPABILITIES_AES_V2 = BIT(26), + /* Bits 27-28 are currently reserved */ + ICP_ACCEL_CAPABILITIES_ZUC_256 = BIT(29), + ICP_ACCEL_CAPABILITIES_WIRELESS_CRYPTO_EXT = BIT(30), }; #define QAT_AUTH_MODE_BITPOS 4 diff --git a/drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h b/drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h index 69482abdb8..e28241bdd0 100644 --- a/drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h +++ b/drivers/crypto/intel/qat/qat_common/icp_qat_uclo.h @@ -7,7 +7,7 @@ #define ICP_QAT_AC_C62X_DEV_TYPE 0x01000000 #define ICP_QAT_AC_C3XXX_DEV_TYPE 0x02000000 #define ICP_QAT_AC_4XXX_A_DEV_TYPE 0x08000000 -#define ICP_QAT_UCLO_MAX_AE 12 +#define ICP_QAT_UCLO_MAX_AE 17 #define ICP_QAT_UCLO_MAX_CTX 8 #define ICP_QAT_UCLO_MAX_UIMAGE (ICP_QAT_UCLO_MAX_AE * ICP_QAT_UCLO_MAX_CTX) #define ICP_QAT_UCLO_MAX_USTORE 0x4000 diff --git a/drivers/crypto/intel/qat/qat_common/qat_hal.c b/drivers/crypto/intel/qat/qat_common/qat_hal.c index cbb946a800..317cafa9d1 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_hal.c +++ b/drivers/crypto/intel/qat/qat_common/qat_hal.c @@ -697,12 +697,16 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle, case ADF_4XXX_PCI_DEVICE_ID: case ADF_401XX_PCI_DEVICE_ID: case ADF_402XX_PCI_DEVICE_ID: + case ADF_420XX_PCI_DEVICE_ID: handle->chip_info->mmp_sram_size = 0; handle->chip_info->nn = false; handle->chip_info->lm2lm3 = true; handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG_2X; handle->chip_info->icp_rst_csr = ICP_RESET_CPP0; - handle->chip_info->icp_rst_mask = 0x100015; + if (handle->pci_dev->device == ADF_420XX_PCI_DEVICE_ID) + handle->chip_info->icp_rst_mask = 0x100155; + else + handle->chip_info->icp_rst_mask = 0x100015; handle->chip_info->glb_clk_enable_csr = ICP_GLOBAL_CLK_ENABLE_CPP0; handle->chip_info->misc_ctl_csr = MISC_CONTROL_C4XXX; handle->chip_info->wakeup_event_val = 0x80000000; diff --git a/drivers/crypto/intel/qat/qat_common/qat_uclo.c b/drivers/crypto/intel/qat/qat_common/qat_uclo.c index e27ea7e28c..ad2c64af74 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_uclo.c +++ b/drivers/crypto/intel/qat/qat_common/qat_uclo.c @@ -733,6 +733,7 @@ qat_uclo_get_dev_type(struct icp_qat_fw_loader_handle *handle) case ADF_4XXX_PCI_DEVICE_ID: case ADF_401XX_PCI_DEVICE_ID: case ADF_402XX_PCI_DEVICE_ID: + case ADF_420XX_PCI_DEVICE_ID: return ICP_QAT_AC_4XXX_A_DEV_TYPE; default: pr_err("QAT: unsupported device 0x%x\n", |