diff options
Diffstat (limited to 'drivers/crypto/marvell')
45 files changed, 20665 insertions, 0 deletions
diff --git a/drivers/crypto/marvell/Kconfig b/drivers/crypto/marvell/Kconfig new file mode 100644 index 0000000000..a48591af12 --- /dev/null +++ b/drivers/crypto/marvell/Kconfig @@ -0,0 +1,53 @@ +# +# Marvell crypto drivers configuration +# + +config CRYPTO_DEV_MARVELL + tristate + +config CRYPTO_DEV_MARVELL_CESA + tristate "Marvell's Cryptographic Engine driver" + depends on PLAT_ORION || ARCH_MVEBU + select CRYPTO_LIB_AES + select CRYPTO_LIB_DES + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select SRAM + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Cryptographic Engines and + Security Accelerator (CESA) which can be found on MVEBU and ORION + platforms. + This driver supports CPU offload through DMA transfers. + +config CRYPTO_DEV_OCTEONTX_CPT + tristate "Support for Marvell OcteonTX CPT driver" + depends on ARCH_THUNDER || COMPILE_TEST + depends on PCI_MSI && 64BIT + depends on CRYPTO_LIB_AES + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select CRYPTO_AEAD + select CRYPTO_DEV_MARVELL + help + This driver allows you to utilize the Marvell Cryptographic + Accelerator Unit(CPT) found in OcteonTX series of processors. + + To compile this driver as module, choose M here: + the modules will be called octeontx-cpt and octeontx-cptvf + +config CRYPTO_DEV_OCTEONTX2_CPT + tristate "Marvell OcteonTX2 CPT driver" + depends on ARCH_THUNDER2 || COMPILE_TEST + depends on PCI_MSI && 64BIT + depends on CRYPTO_LIB_AES + depends on NET_VENDOR_MARVELL + select OCTEONTX2_MBOX + select CRYPTO_DEV_MARVELL + select CRYPTO_SKCIPHER + select CRYPTO_HASH + select CRYPTO_AEAD + select NET_DEVLINK + help + This driver allows you to utilize the Marvell Cryptographic + Accelerator Unit(CPT) found in OcteonTX2 series of processors. diff --git a/drivers/crypto/marvell/Makefile b/drivers/crypto/marvell/Makefile new file mode 100644 index 0000000000..39db6d9c0a --- /dev/null +++ b/drivers/crypto/marvell/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += cesa/ +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx/ +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2/ diff --git a/drivers/crypto/marvell/cesa/Makefile b/drivers/crypto/marvell/cesa/Makefile new file mode 100644 index 0000000000..b27cab65e6 --- /dev/null +++ b/drivers/crypto/marvell/cesa/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_MARVELL_CESA) += marvell-cesa.o +marvell-cesa-objs := cesa.o cipher.o hash.o tdma.o diff --git a/drivers/crypto/marvell/cesa/cesa.c b/drivers/crypto/marvell/cesa/cesa.c new file mode 100644 index 0000000000..b61e35b932 --- /dev/null +++ b/drivers/crypto/marvell/cesa/cesa.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Support for Marvell's Cryptographic Engine and Security Accelerator (CESA) + * that can be found on the following platform: Orion, Kirkwood, Armada. This + * driver supports the TDMA engine on platforms on which it is available. + * + * Author: Boris Brezillon <boris.brezillon@free-electrons.com> + * Author: Arnaud Ebalard <arno@natisbad.org> + * + * This work is based on an initial version written by + * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc > + */ + +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/genalloc.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/kthread.h> +#include <linux/mbus.h> +#include <linux/platform_device.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/clk.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/of_irq.h> + +#include "cesa.h" + +/* Limit of the crypto queue before reaching the backlog */ +#define CESA_CRYPTO_DEFAULT_MAX_QLEN 128 + +struct mv_cesa_dev *cesa_dev; + +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog) +{ + struct crypto_async_request *req; + + *backlog = crypto_get_backlog(&engine->queue); + req = crypto_dequeue_request(&engine->queue); + + if (!req) + return NULL; + + return req; +} + +static void mv_cesa_rearm_engine(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req = NULL, *backlog = NULL; + struct mv_cesa_ctx *ctx; + + + spin_lock_bh(&engine->lock); + if (!engine->req) { + req = mv_cesa_dequeue_req_locked(engine, &backlog); + engine->req = req; + } + spin_unlock_bh(&engine->lock); + + if (!req) + return; + + if (backlog) + crypto_request_complete(backlog, -EINPROGRESS); + + ctx = crypto_tfm_ctx(req->tfm); + ctx->ops->step(req); +} + +static int mv_cesa_std_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req; + struct mv_cesa_ctx *ctx; + int res; + + req = engine->req; + ctx = crypto_tfm_ctx(req->tfm); + res = ctx->ops->process(req, status); + + if (res == 0) { + ctx->ops->complete(req); + mv_cesa_engine_enqueue_complete_request(engine, req); + } else if (res == -EINPROGRESS) { + ctx->ops->step(req); + } + + return res; +} + +static int mv_cesa_int_process(struct mv_cesa_engine *engine, u32 status) +{ + if (engine->chain.first && engine->chain.last) + return mv_cesa_tdma_process(engine, status); + + return mv_cesa_std_process(engine, status); +} + +static inline void +mv_cesa_complete_req(struct mv_cesa_ctx *ctx, struct crypto_async_request *req, + int res) +{ + ctx->ops->cleanup(req); + local_bh_disable(); + crypto_request_complete(req, res); + local_bh_enable(); +} + +static irqreturn_t mv_cesa_int(int irq, void *priv) +{ + struct mv_cesa_engine *engine = priv; + struct crypto_async_request *req; + struct mv_cesa_ctx *ctx; + u32 status, mask; + irqreturn_t ret = IRQ_NONE; + + while (true) { + int res; + + mask = mv_cesa_get_int_mask(engine); + status = readl(engine->regs + CESA_SA_INT_STATUS); + + if (!(status & mask)) + break; + + /* + * TODO: avoid clearing the FPGA_INT_STATUS if this not + * relevant on some platforms. + */ + writel(~status, engine->regs + CESA_SA_FPGA_INT_STATUS); + writel(~status, engine->regs + CESA_SA_INT_STATUS); + + /* Process fetched requests */ + res = mv_cesa_int_process(engine, status & mask); + ret = IRQ_HANDLED; + + spin_lock_bh(&engine->lock); + req = engine->req; + if (res != -EINPROGRESS) + engine->req = NULL; + spin_unlock_bh(&engine->lock); + + ctx = crypto_tfm_ctx(req->tfm); + + if (res && res != -EINPROGRESS) + mv_cesa_complete_req(ctx, req, res); + + /* Launch the next pending request */ + mv_cesa_rearm_engine(engine); + + /* Iterate over the complete queue */ + while (true) { + req = mv_cesa_engine_dequeue_complete_request(engine); + if (!req) + break; + + ctx = crypto_tfm_ctx(req->tfm); + mv_cesa_complete_req(ctx, req, 0); + } + } + + return ret; +} + +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq) +{ + int ret; + struct mv_cesa_engine *engine = creq->engine; + + spin_lock_bh(&engine->lock); + ret = crypto_enqueue_request(&engine->queue, req); + if ((mv_cesa_req_get_type(creq) == CESA_DMA_REQ) && + (ret == -EINPROGRESS || ret == -EBUSY)) + mv_cesa_tdma_chain(engine, creq); + spin_unlock_bh(&engine->lock); + + if (ret != -EINPROGRESS) + return ret; + + mv_cesa_rearm_engine(engine); + + return -EINPROGRESS; +} + +static int mv_cesa_add_algs(struct mv_cesa_dev *cesa) +{ + int ret; + int i, j; + + for (i = 0; i < cesa->caps->ncipher_algs; i++) { + ret = crypto_register_skcipher(cesa->caps->cipher_algs[i]); + if (ret) + goto err_unregister_crypto; + } + + for (i = 0; i < cesa->caps->nahash_algs; i++) { + ret = crypto_register_ahash(cesa->caps->ahash_algs[i]); + if (ret) + goto err_unregister_ahash; + } + + return 0; + +err_unregister_ahash: + for (j = 0; j < i; j++) + crypto_unregister_ahash(cesa->caps->ahash_algs[j]); + i = cesa->caps->ncipher_algs; + +err_unregister_crypto: + for (j = 0; j < i; j++) + crypto_unregister_skcipher(cesa->caps->cipher_algs[j]); + + return ret; +} + +static void mv_cesa_remove_algs(struct mv_cesa_dev *cesa) +{ + int i; + + for (i = 0; i < cesa->caps->nahash_algs; i++) + crypto_unregister_ahash(cesa->caps->ahash_algs[i]); + + for (i = 0; i < cesa->caps->ncipher_algs; i++) + crypto_unregister_skcipher(cesa->caps->cipher_algs[i]); +} + +static struct skcipher_alg *orion_cipher_algs[] = { + &mv_cesa_ecb_des_alg, + &mv_cesa_cbc_des_alg, + &mv_cesa_ecb_des3_ede_alg, + &mv_cesa_cbc_des3_ede_alg, + &mv_cesa_ecb_aes_alg, + &mv_cesa_cbc_aes_alg, +}; + +static struct ahash_alg *orion_ahash_algs[] = { + &mv_md5_alg, + &mv_sha1_alg, + &mv_ahmac_md5_alg, + &mv_ahmac_sha1_alg, +}; + +static struct skcipher_alg *armada_370_cipher_algs[] = { + &mv_cesa_ecb_des_alg, + &mv_cesa_cbc_des_alg, + &mv_cesa_ecb_des3_ede_alg, + &mv_cesa_cbc_des3_ede_alg, + &mv_cesa_ecb_aes_alg, + &mv_cesa_cbc_aes_alg, +}; + +static struct ahash_alg *armada_370_ahash_algs[] = { + &mv_md5_alg, + &mv_sha1_alg, + &mv_sha256_alg, + &mv_ahmac_md5_alg, + &mv_ahmac_sha1_alg, + &mv_ahmac_sha256_alg, +}; + +static const struct mv_cesa_caps orion_caps = { + .nengines = 1, + .cipher_algs = orion_cipher_algs, + .ncipher_algs = ARRAY_SIZE(orion_cipher_algs), + .ahash_algs = orion_ahash_algs, + .nahash_algs = ARRAY_SIZE(orion_ahash_algs), + .has_tdma = false, +}; + +static const struct mv_cesa_caps kirkwood_caps = { + .nengines = 1, + .cipher_algs = orion_cipher_algs, + .ncipher_algs = ARRAY_SIZE(orion_cipher_algs), + .ahash_algs = orion_ahash_algs, + .nahash_algs = ARRAY_SIZE(orion_ahash_algs), + .has_tdma = true, +}; + +static const struct mv_cesa_caps armada_370_caps = { + .nengines = 1, + .cipher_algs = armada_370_cipher_algs, + .ncipher_algs = ARRAY_SIZE(armada_370_cipher_algs), + .ahash_algs = armada_370_ahash_algs, + .nahash_algs = ARRAY_SIZE(armada_370_ahash_algs), + .has_tdma = true, +}; + +static const struct mv_cesa_caps armada_xp_caps = { + .nengines = 2, + .cipher_algs = armada_370_cipher_algs, + .ncipher_algs = ARRAY_SIZE(armada_370_cipher_algs), + .ahash_algs = armada_370_ahash_algs, + .nahash_algs = ARRAY_SIZE(armada_370_ahash_algs), + .has_tdma = true, +}; + +static const struct of_device_id mv_cesa_of_match_table[] = { + { .compatible = "marvell,orion-crypto", .data = &orion_caps }, + { .compatible = "marvell,kirkwood-crypto", .data = &kirkwood_caps }, + { .compatible = "marvell,dove-crypto", .data = &kirkwood_caps }, + { .compatible = "marvell,armada-370-crypto", .data = &armada_370_caps }, + { .compatible = "marvell,armada-xp-crypto", .data = &armada_xp_caps }, + { .compatible = "marvell,armada-375-crypto", .data = &armada_xp_caps }, + { .compatible = "marvell,armada-38x-crypto", .data = &armada_xp_caps }, + {} +}; +MODULE_DEVICE_TABLE(of, mv_cesa_of_match_table); + +static void +mv_cesa_conf_mbus_windows(struct mv_cesa_engine *engine, + const struct mbus_dram_target_info *dram) +{ + void __iomem *iobase = engine->regs; + int i; + + for (i = 0; i < 4; i++) { + writel(0, iobase + CESA_TDMA_WINDOW_CTRL(i)); + writel(0, iobase + CESA_TDMA_WINDOW_BASE(i)); + } + + for (i = 0; i < dram->num_cs; i++) { + const struct mbus_dram_window *cs = dram->cs + i; + + writel(((cs->size - 1) & 0xffff0000) | + (cs->mbus_attr << 8) | + (dram->mbus_dram_target_id << 4) | 1, + iobase + CESA_TDMA_WINDOW_CTRL(i)); + writel(cs->base, iobase + CESA_TDMA_WINDOW_BASE(i)); + } +} + +static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa) +{ + struct device *dev = cesa->dev; + struct mv_cesa_dev_dma *dma; + + if (!cesa->caps->has_tdma) + return 0; + + dma = devm_kzalloc(dev, sizeof(*dma), GFP_KERNEL); + if (!dma) + return -ENOMEM; + + dma->tdma_desc_pool = dmam_pool_create("tdma_desc", dev, + sizeof(struct mv_cesa_tdma_desc), + 16, 0); + if (!dma->tdma_desc_pool) + return -ENOMEM; + + dma->op_pool = dmam_pool_create("cesa_op", dev, + sizeof(struct mv_cesa_op_ctx), 16, 0); + if (!dma->op_pool) + return -ENOMEM; + + dma->cache_pool = dmam_pool_create("cesa_cache", dev, + CESA_MAX_HASH_BLOCK_SIZE, 1, 0); + if (!dma->cache_pool) + return -ENOMEM; + + dma->padding_pool = dmam_pool_create("cesa_padding", dev, 72, 1, 0); + if (!dma->padding_pool) + return -ENOMEM; + + cesa->dma = dma; + + return 0; +} + +static int mv_cesa_get_sram(struct platform_device *pdev, int idx) +{ + struct mv_cesa_dev *cesa = platform_get_drvdata(pdev); + struct mv_cesa_engine *engine = &cesa->engines[idx]; + const char *res_name = "sram"; + struct resource *res; + + engine->pool = of_gen_pool_get(cesa->dev->of_node, + "marvell,crypto-srams", idx); + if (engine->pool) { + engine->sram_pool = gen_pool_dma_alloc(engine->pool, + cesa->sram_size, + &engine->sram_dma); + if (engine->sram_pool) + return 0; + + engine->pool = NULL; + return -ENOMEM; + } + + if (cesa->caps->nengines > 1) { + if (!idx) + res_name = "sram0"; + else + res_name = "sram1"; + } + + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, + res_name); + if (!res || resource_size(res) < cesa->sram_size) + return -EINVAL; + + engine->sram = devm_ioremap_resource(cesa->dev, res); + if (IS_ERR(engine->sram)) + return PTR_ERR(engine->sram); + + engine->sram_dma = dma_map_resource(cesa->dev, res->start, + cesa->sram_size, + DMA_BIDIRECTIONAL, 0); + if (dma_mapping_error(cesa->dev, engine->sram_dma)) + return -ENOMEM; + + return 0; +} + +static void mv_cesa_put_sram(struct platform_device *pdev, int idx) +{ + struct mv_cesa_dev *cesa = platform_get_drvdata(pdev); + struct mv_cesa_engine *engine = &cesa->engines[idx]; + + if (engine->pool) + gen_pool_free(engine->pool, (unsigned long)engine->sram_pool, + cesa->sram_size); + else + dma_unmap_resource(cesa->dev, engine->sram_dma, + cesa->sram_size, DMA_BIDIRECTIONAL, 0); +} + +static int mv_cesa_probe(struct platform_device *pdev) +{ + const struct mv_cesa_caps *caps = &orion_caps; + const struct mbus_dram_target_info *dram; + const struct of_device_id *match; + struct device *dev = &pdev->dev; + struct mv_cesa_dev *cesa; + struct mv_cesa_engine *engines; + int irq, ret, i, cpu; + u32 sram_size; + + if (cesa_dev) { + dev_err(&pdev->dev, "Only one CESA device authorized\n"); + return -EEXIST; + } + + if (dev->of_node) { + match = of_match_node(mv_cesa_of_match_table, dev->of_node); + if (!match || !match->data) + return -ENOTSUPP; + + caps = match->data; + } + + cesa = devm_kzalloc(dev, sizeof(*cesa), GFP_KERNEL); + if (!cesa) + return -ENOMEM; + + cesa->caps = caps; + cesa->dev = dev; + + sram_size = CESA_SA_DEFAULT_SRAM_SIZE; + of_property_read_u32(cesa->dev->of_node, "marvell,crypto-sram-size", + &sram_size); + if (sram_size < CESA_SA_MIN_SRAM_SIZE) + sram_size = CESA_SA_MIN_SRAM_SIZE; + + cesa->sram_size = sram_size; + cesa->engines = devm_kcalloc(dev, caps->nengines, sizeof(*engines), + GFP_KERNEL); + if (!cesa->engines) + return -ENOMEM; + + spin_lock_init(&cesa->lock); + + cesa->regs = devm_platform_ioremap_resource_byname(pdev, "regs"); + if (IS_ERR(cesa->regs)) + return PTR_ERR(cesa->regs); + + ret = mv_cesa_dev_dma_init(cesa); + if (ret) + return ret; + + dram = mv_mbus_dram_info_nooverlap(); + + platform_set_drvdata(pdev, cesa); + + for (i = 0; i < caps->nengines; i++) { + struct mv_cesa_engine *engine = &cesa->engines[i]; + char res_name[7]; + + engine->id = i; + spin_lock_init(&engine->lock); + + ret = mv_cesa_get_sram(pdev, i); + if (ret) + goto err_cleanup; + + irq = platform_get_irq(pdev, i); + if (irq < 0) { + ret = irq; + goto err_cleanup; + } + + engine->irq = irq; + + /* + * Not all platforms can gate the CESA clocks: do not complain + * if the clock does not exist. + */ + snprintf(res_name, sizeof(res_name), "cesa%d", i); + engine->clk = devm_clk_get(dev, res_name); + if (IS_ERR(engine->clk)) { + engine->clk = devm_clk_get(dev, NULL); + if (IS_ERR(engine->clk)) + engine->clk = NULL; + } + + snprintf(res_name, sizeof(res_name), "cesaz%d", i); + engine->zclk = devm_clk_get(dev, res_name); + if (IS_ERR(engine->zclk)) + engine->zclk = NULL; + + ret = clk_prepare_enable(engine->clk); + if (ret) + goto err_cleanup; + + ret = clk_prepare_enable(engine->zclk); + if (ret) + goto err_cleanup; + + engine->regs = cesa->regs + CESA_ENGINE_OFF(i); + + if (dram && cesa->caps->has_tdma) + mv_cesa_conf_mbus_windows(engine, dram); + + writel(0, engine->regs + CESA_SA_INT_STATUS); + writel(CESA_SA_CFG_STOP_DIG_ERR, + engine->regs + CESA_SA_CFG); + writel(engine->sram_dma & CESA_SA_SRAM_MSK, + engine->regs + CESA_SA_DESC_P0); + + ret = devm_request_threaded_irq(dev, irq, NULL, mv_cesa_int, + IRQF_ONESHOT, + dev_name(&pdev->dev), + engine); + if (ret) + goto err_cleanup; + + /* Set affinity */ + cpu = cpumask_local_spread(engine->id, NUMA_NO_NODE); + irq_set_affinity_hint(irq, get_cpu_mask(cpu)); + + crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); + atomic_set(&engine->load, 0); + INIT_LIST_HEAD(&engine->complete_queue); + } + + cesa_dev = cesa; + + ret = mv_cesa_add_algs(cesa); + if (ret) { + cesa_dev = NULL; + goto err_cleanup; + } + + dev_info(dev, "CESA device successfully registered\n"); + + return 0; + +err_cleanup: + for (i = 0; i < caps->nengines; i++) { + clk_disable_unprepare(cesa->engines[i].zclk); + clk_disable_unprepare(cesa->engines[i].clk); + mv_cesa_put_sram(pdev, i); + if (cesa->engines[i].irq > 0) + irq_set_affinity_hint(cesa->engines[i].irq, NULL); + } + + return ret; +} + +static int mv_cesa_remove(struct platform_device *pdev) +{ + struct mv_cesa_dev *cesa = platform_get_drvdata(pdev); + int i; + + mv_cesa_remove_algs(cesa); + + for (i = 0; i < cesa->caps->nengines; i++) { + clk_disable_unprepare(cesa->engines[i].zclk); + clk_disable_unprepare(cesa->engines[i].clk); + mv_cesa_put_sram(pdev, i); + irq_set_affinity_hint(cesa->engines[i].irq, NULL); + } + + return 0; +} + +static const struct platform_device_id mv_cesa_plat_id_table[] = { + { .name = "mv_crypto" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(platform, mv_cesa_plat_id_table); + +static struct platform_driver marvell_cesa = { + .probe = mv_cesa_probe, + .remove = mv_cesa_remove, + .id_table = mv_cesa_plat_id_table, + .driver = { + .name = "marvell-cesa", + .of_match_table = mv_cesa_of_match_table, + }, +}; +module_platform_driver(marvell_cesa); + +MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>"); +MODULE_AUTHOR("Arnaud Ebalard <arno@natisbad.org>"); +MODULE_DESCRIPTION("Support for Marvell's cryptographic engine"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h new file mode 100644 index 0000000000..d215a6bed6 --- /dev/null +++ b/drivers/crypto/marvell/cesa/cesa.h @@ -0,0 +1,915 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __MARVELL_CESA_H__ +#define __MARVELL_CESA_H__ + +#include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> + +#include <linux/dma-direction.h> +#include <linux/dmapool.h> + +#define CESA_ENGINE_OFF(i) (((i) * 0x2000)) + +#define CESA_TDMA_BYTE_CNT 0x800 +#define CESA_TDMA_SRC_ADDR 0x810 +#define CESA_TDMA_DST_ADDR 0x820 +#define CESA_TDMA_NEXT_ADDR 0x830 + +#define CESA_TDMA_CONTROL 0x840 +#define CESA_TDMA_DST_BURST GENMASK(2, 0) +#define CESA_TDMA_DST_BURST_32B 3 +#define CESA_TDMA_DST_BURST_128B 4 +#define CESA_TDMA_OUT_RD_EN BIT(4) +#define CESA_TDMA_SRC_BURST GENMASK(8, 6) +#define CESA_TDMA_SRC_BURST_32B (3 << 6) +#define CESA_TDMA_SRC_BURST_128B (4 << 6) +#define CESA_TDMA_CHAIN BIT(9) +#define CESA_TDMA_BYTE_SWAP BIT(11) +#define CESA_TDMA_NO_BYTE_SWAP BIT(11) +#define CESA_TDMA_EN BIT(12) +#define CESA_TDMA_FETCH_ND BIT(13) +#define CESA_TDMA_ACT BIT(14) + +#define CESA_TDMA_CUR 0x870 +#define CESA_TDMA_ERROR_CAUSE 0x8c8 +#define CESA_TDMA_ERROR_MSK 0x8cc + +#define CESA_TDMA_WINDOW_BASE(x) (((x) * 0x8) + 0xa00) +#define CESA_TDMA_WINDOW_CTRL(x) (((x) * 0x8) + 0xa04) + +#define CESA_IVDIG(x) (0xdd00 + ((x) * 4) + \ + (((x) < 5) ? 0 : 0x14)) + +#define CESA_SA_CMD 0xde00 +#define CESA_SA_CMD_EN_CESA_SA_ACCL0 BIT(0) +#define CESA_SA_CMD_EN_CESA_SA_ACCL1 BIT(1) +#define CESA_SA_CMD_DISABLE_SEC BIT(2) + +#define CESA_SA_DESC_P0 0xde04 + +#define CESA_SA_DESC_P1 0xde14 + +#define CESA_SA_CFG 0xde08 +#define CESA_SA_CFG_STOP_DIG_ERR GENMASK(1, 0) +#define CESA_SA_CFG_DIG_ERR_CONT 0 +#define CESA_SA_CFG_DIG_ERR_SKIP 1 +#define CESA_SA_CFG_DIG_ERR_STOP 3 +#define CESA_SA_CFG_CH0_W_IDMA BIT(7) +#define CESA_SA_CFG_CH1_W_IDMA BIT(8) +#define CESA_SA_CFG_ACT_CH0_IDMA BIT(9) +#define CESA_SA_CFG_ACT_CH1_IDMA BIT(10) +#define CESA_SA_CFG_MULTI_PKT BIT(11) +#define CESA_SA_CFG_PARA_DIS BIT(13) + +#define CESA_SA_ACCEL_STATUS 0xde0c +#define CESA_SA_ST_ACT_0 BIT(0) +#define CESA_SA_ST_ACT_1 BIT(1) + +/* + * CESA_SA_FPGA_INT_STATUS looks like an FPGA leftover and is documented only + * in Errata 4.12. It looks like that it was part of an IRQ-controller in FPGA + * and someone forgot to remove it while switching to the core and moving to + * CESA_SA_INT_STATUS. + */ +#define CESA_SA_FPGA_INT_STATUS 0xdd68 +#define CESA_SA_INT_STATUS 0xde20 +#define CESA_SA_INT_AUTH_DONE BIT(0) +#define CESA_SA_INT_DES_E_DONE BIT(1) +#define CESA_SA_INT_AES_E_DONE BIT(2) +#define CESA_SA_INT_AES_D_DONE BIT(3) +#define CESA_SA_INT_ENC_DONE BIT(4) +#define CESA_SA_INT_ACCEL0_DONE BIT(5) +#define CESA_SA_INT_ACCEL1_DONE BIT(6) +#define CESA_SA_INT_ACC0_IDMA_DONE BIT(7) +#define CESA_SA_INT_ACC1_IDMA_DONE BIT(8) +#define CESA_SA_INT_IDMA_DONE BIT(9) +#define CESA_SA_INT_IDMA_OWN_ERR BIT(10) + +#define CESA_SA_INT_MSK 0xde24 + +#define CESA_SA_DESC_CFG_OP_MAC_ONLY 0 +#define CESA_SA_DESC_CFG_OP_CRYPT_ONLY 1 +#define CESA_SA_DESC_CFG_OP_MAC_CRYPT 2 +#define CESA_SA_DESC_CFG_OP_CRYPT_MAC 3 +#define CESA_SA_DESC_CFG_OP_MSK GENMASK(1, 0) +#define CESA_SA_DESC_CFG_MACM_SHA256 (1 << 4) +#define CESA_SA_DESC_CFG_MACM_HMAC_SHA256 (3 << 4) +#define CESA_SA_DESC_CFG_MACM_MD5 (4 << 4) +#define CESA_SA_DESC_CFG_MACM_SHA1 (5 << 4) +#define CESA_SA_DESC_CFG_MACM_HMAC_MD5 (6 << 4) +#define CESA_SA_DESC_CFG_MACM_HMAC_SHA1 (7 << 4) +#define CESA_SA_DESC_CFG_MACM_MSK GENMASK(6, 4) +#define CESA_SA_DESC_CFG_CRYPTM_DES (1 << 8) +#define CESA_SA_DESC_CFG_CRYPTM_3DES (2 << 8) +#define CESA_SA_DESC_CFG_CRYPTM_AES (3 << 8) +#define CESA_SA_DESC_CFG_CRYPTM_MSK GENMASK(9, 8) +#define CESA_SA_DESC_CFG_DIR_ENC (0 << 12) +#define CESA_SA_DESC_CFG_DIR_DEC (1 << 12) +#define CESA_SA_DESC_CFG_CRYPTCM_ECB (0 << 16) +#define CESA_SA_DESC_CFG_CRYPTCM_CBC (1 << 16) +#define CESA_SA_DESC_CFG_CRYPTCM_MSK BIT(16) +#define CESA_SA_DESC_CFG_3DES_EEE (0 << 20) +#define CESA_SA_DESC_CFG_3DES_EDE (1 << 20) +#define CESA_SA_DESC_CFG_AES_LEN_128 (0 << 24) +#define CESA_SA_DESC_CFG_AES_LEN_192 (1 << 24) +#define CESA_SA_DESC_CFG_AES_LEN_256 (2 << 24) +#define CESA_SA_DESC_CFG_AES_LEN_MSK GENMASK(25, 24) +#define CESA_SA_DESC_CFG_NOT_FRAG (0 << 30) +#define CESA_SA_DESC_CFG_FIRST_FRAG (1 << 30) +#define CESA_SA_DESC_CFG_LAST_FRAG (2 << 30) +#define CESA_SA_DESC_CFG_MID_FRAG (3 << 30) +#define CESA_SA_DESC_CFG_FRAG_MSK GENMASK(31, 30) + +/* + * /-----------\ 0 + * | ACCEL CFG | 4 * 8 + * |-----------| 0x20 + * | CRYPT KEY | 8 * 4 + * |-----------| 0x40 + * | IV IN | 4 * 4 + * |-----------| 0x40 (inplace) + * | IV BUF | 4 * 4 + * |-----------| 0x80 + * | DATA IN | 16 * x (max ->max_req_size) + * |-----------| 0x80 (inplace operation) + * | DATA OUT | 16 * x (max ->max_req_size) + * \-----------/ SRAM size + */ + +/* + * Hashing memory map: + * /-----------\ 0 + * | ACCEL CFG | 4 * 8 + * |-----------| 0x20 + * | Inner IV | 8 * 4 + * |-----------| 0x40 + * | Outer IV | 8 * 4 + * |-----------| 0x60 + * | Output BUF| 8 * 4 + * |-----------| 0x80 + * | DATA IN | 64 * x (max ->max_req_size) + * \-----------/ SRAM size + */ + +#define CESA_SA_CFG_SRAM_OFFSET 0x00 +#define CESA_SA_DATA_SRAM_OFFSET 0x80 + +#define CESA_SA_CRYPT_KEY_SRAM_OFFSET 0x20 +#define CESA_SA_CRYPT_IV_SRAM_OFFSET 0x40 + +#define CESA_SA_MAC_IIV_SRAM_OFFSET 0x20 +#define CESA_SA_MAC_OIV_SRAM_OFFSET 0x40 +#define CESA_SA_MAC_DIG_SRAM_OFFSET 0x60 + +#define CESA_SA_DESC_CRYPT_DATA(offset) \ + cpu_to_le32((CESA_SA_DATA_SRAM_OFFSET + (offset)) | \ + ((CESA_SA_DATA_SRAM_OFFSET + (offset)) << 16)) + +#define CESA_SA_DESC_CRYPT_IV(offset) \ + cpu_to_le32((CESA_SA_CRYPT_IV_SRAM_OFFSET + (offset)) | \ + ((CESA_SA_CRYPT_IV_SRAM_OFFSET + (offset)) << 16)) + +#define CESA_SA_DESC_CRYPT_KEY(offset) \ + cpu_to_le32(CESA_SA_CRYPT_KEY_SRAM_OFFSET + (offset)) + +#define CESA_SA_DESC_MAC_DATA(offset) \ + cpu_to_le32(CESA_SA_DATA_SRAM_OFFSET + (offset)) +#define CESA_SA_DESC_MAC_DATA_MSK cpu_to_le32(GENMASK(15, 0)) + +#define CESA_SA_DESC_MAC_TOTAL_LEN(total_len) cpu_to_le32((total_len) << 16) +#define CESA_SA_DESC_MAC_TOTAL_LEN_MSK cpu_to_le32(GENMASK(31, 16)) + +#define CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX 0xffff + +#define CESA_SA_DESC_MAC_DIGEST(offset) \ + cpu_to_le32(CESA_SA_MAC_DIG_SRAM_OFFSET + (offset)) +#define CESA_SA_DESC_MAC_DIGEST_MSK cpu_to_le32(GENMASK(15, 0)) + +#define CESA_SA_DESC_MAC_FRAG_LEN(frag_len) cpu_to_le32((frag_len) << 16) +#define CESA_SA_DESC_MAC_FRAG_LEN_MSK cpu_to_le32(GENMASK(31, 16)) + +#define CESA_SA_DESC_MAC_IV(offset) \ + cpu_to_le32((CESA_SA_MAC_IIV_SRAM_OFFSET + (offset)) | \ + ((CESA_SA_MAC_OIV_SRAM_OFFSET + (offset)) << 16)) + +#define CESA_SA_SRAM_SIZE 2048 +#define CESA_SA_SRAM_PAYLOAD_SIZE (cesa_dev->sram_size - \ + CESA_SA_DATA_SRAM_OFFSET) + +#define CESA_SA_DEFAULT_SRAM_SIZE 2048 +#define CESA_SA_MIN_SRAM_SIZE 1024 + +#define CESA_SA_SRAM_MSK (2048 - 1) + +#define CESA_MAX_HASH_BLOCK_SIZE 64 +#define CESA_HASH_BLOCK_SIZE_MSK (CESA_MAX_HASH_BLOCK_SIZE - 1) + +/** + * struct mv_cesa_sec_accel_desc - security accelerator descriptor + * @config: engine config + * @enc_p: input and output data pointers for a cipher operation + * @enc_len: cipher operation length + * @enc_key_p: cipher key pointer + * @enc_iv: cipher IV pointers + * @mac_src_p: input pointer and total hash length + * @mac_digest: digest pointer and hash operation length + * @mac_iv: hmac IV pointers + * + * Structure passed to the CESA engine to describe the crypto operation + * to be executed. + */ +struct mv_cesa_sec_accel_desc { + __le32 config; + __le32 enc_p; + __le32 enc_len; + __le32 enc_key_p; + __le32 enc_iv; + __le32 mac_src_p; + __le32 mac_digest; + __le32 mac_iv; +}; + +/** + * struct mv_cesa_skcipher_op_ctx - cipher operation context + * @key: cipher key + * @iv: cipher IV + * + * Context associated to a cipher operation. + */ +struct mv_cesa_skcipher_op_ctx { + __le32 key[8]; + u32 iv[4]; +}; + +/** + * struct mv_cesa_hash_op_ctx - hash or hmac operation context + * @key: cipher key + * @iv: cipher IV + * + * Context associated to an hash or hmac operation. + */ +struct mv_cesa_hash_op_ctx { + u32 iv[16]; + __le32 hash[8]; +}; + +/** + * struct mv_cesa_op_ctx - crypto operation context + * @desc: CESA descriptor + * @ctx: context associated to the crypto operation + * + * Context associated to a crypto operation. + */ +struct mv_cesa_op_ctx { + struct mv_cesa_sec_accel_desc desc; + union { + struct mv_cesa_skcipher_op_ctx skcipher; + struct mv_cesa_hash_op_ctx hash; + } ctx; +}; + +/* TDMA descriptor flags */ +#define CESA_TDMA_DST_IN_SRAM BIT(31) +#define CESA_TDMA_SRC_IN_SRAM BIT(30) +#define CESA_TDMA_END_OF_REQ BIT(29) +#define CESA_TDMA_BREAK_CHAIN BIT(28) +#define CESA_TDMA_SET_STATE BIT(27) +#define CESA_TDMA_TYPE_MSK GENMASK(26, 0) +#define CESA_TDMA_DUMMY 0 +#define CESA_TDMA_DATA 1 +#define CESA_TDMA_OP 2 +#define CESA_TDMA_RESULT 3 + +/** + * struct mv_cesa_tdma_desc - TDMA descriptor + * @byte_cnt: number of bytes to transfer + * @src: DMA address of the source + * @dst: DMA address of the destination + * @next_dma: DMA address of the next TDMA descriptor + * @cur_dma: DMA address of this TDMA descriptor + * @next: pointer to the next TDMA descriptor + * @op: CESA operation attached to this TDMA descriptor + * @data: raw data attached to this TDMA descriptor + * @flags: flags describing the TDMA transfer. See the + * "TDMA descriptor flags" section above + * + * TDMA descriptor used to create a transfer chain describing a crypto + * operation. + */ +struct mv_cesa_tdma_desc { + __le32 byte_cnt; + union { + __le32 src; + u32 src_dma; + }; + union { + __le32 dst; + u32 dst_dma; + }; + __le32 next_dma; + + /* Software state */ + dma_addr_t cur_dma; + struct mv_cesa_tdma_desc *next; + union { + struct mv_cesa_op_ctx *op; + void *data; + }; + u32 flags; +}; + +/** + * struct mv_cesa_sg_dma_iter - scatter-gather iterator + * @dir: transfer direction + * @sg: scatter list + * @offset: current position in the scatter list + * @op_offset: current position in the crypto operation + * + * Iterator used to iterate over a scatterlist while creating a TDMA chain for + * a crypto operation. + */ +struct mv_cesa_sg_dma_iter { + enum dma_data_direction dir; + struct scatterlist *sg; + unsigned int offset; + unsigned int op_offset; +}; + +/** + * struct mv_cesa_dma_iter - crypto operation iterator + * @len: the crypto operation length + * @offset: current position in the crypto operation + * @op_len: sub-operation length (the crypto engine can only act on 2kb + * chunks) + * + * Iterator used to create a TDMA chain for a given crypto operation. + */ +struct mv_cesa_dma_iter { + unsigned int len; + unsigned int offset; + unsigned int op_len; +}; + +/** + * struct mv_cesa_tdma_chain - TDMA chain + * @first: first entry in the TDMA chain + * @last: last entry in the TDMA chain + * + * Stores a TDMA chain for a specific crypto operation. + */ +struct mv_cesa_tdma_chain { + struct mv_cesa_tdma_desc *first; + struct mv_cesa_tdma_desc *last; +}; + +struct mv_cesa_engine; + +/** + * struct mv_cesa_caps - CESA device capabilities + * @engines: number of engines + * @has_tdma: whether this device has a TDMA block + * @cipher_algs: supported cipher algorithms + * @ncipher_algs: number of supported cipher algorithms + * @ahash_algs: supported hash algorithms + * @nahash_algs: number of supported hash algorithms + * + * Structure used to describe CESA device capabilities. + */ +struct mv_cesa_caps { + int nengines; + bool has_tdma; + struct skcipher_alg **cipher_algs; + int ncipher_algs; + struct ahash_alg **ahash_algs; + int nahash_algs; +}; + +/** + * struct mv_cesa_dev_dma - DMA pools + * @tdma_desc_pool: TDMA desc pool + * @op_pool: crypto operation pool + * @cache_pool: data cache pool (used by hash implementation when the + * hash request is smaller than the hash block size) + * @padding_pool: padding pool (used by hash implementation when hardware + * padding cannot be used) + * + * Structure containing the different DMA pools used by this driver. + */ +struct mv_cesa_dev_dma { + struct dma_pool *tdma_desc_pool; + struct dma_pool *op_pool; + struct dma_pool *cache_pool; + struct dma_pool *padding_pool; +}; + +/** + * struct mv_cesa_dev - CESA device + * @caps: device capabilities + * @regs: device registers + * @sram_size: usable SRAM size + * @lock: device lock + * @engines: array of engines + * @dma: dma pools + * + * Structure storing CESA device information. + */ +struct mv_cesa_dev { + const struct mv_cesa_caps *caps; + void __iomem *regs; + struct device *dev; + unsigned int sram_size; + spinlock_t lock; + struct mv_cesa_engine *engines; + struct mv_cesa_dev_dma *dma; +}; + +/** + * struct mv_cesa_engine - CESA engine + * @id: engine id + * @regs: engine registers + * @sram: SRAM memory region + * @sram_pool: SRAM memory region from pool + * @sram_dma: DMA address of the SRAM memory region + * @lock: engine lock + * @req: current crypto request + * @clk: engine clk + * @zclk: engine zclk + * @max_req_len: maximum chunk length (useful to create the TDMA chain) + * @int_mask: interrupt mask cache + * @pool: memory pool pointing to the memory region reserved in + * SRAM + * @queue: fifo of the pending crypto requests + * @load: engine load counter, useful for load balancing + * @chain: list of the current tdma descriptors being processed + * by this engine. + * @complete_queue: fifo of the processed requests by the engine + * + * Structure storing CESA engine information. + */ +struct mv_cesa_engine { + int id; + void __iomem *regs; + union { + void __iomem *sram; + void *sram_pool; + }; + dma_addr_t sram_dma; + spinlock_t lock; + struct crypto_async_request *req; + struct clk *clk; + struct clk *zclk; + size_t max_req_len; + u32 int_mask; + struct gen_pool *pool; + struct crypto_queue queue; + atomic_t load; + struct mv_cesa_tdma_chain chain; + struct list_head complete_queue; + int irq; +}; + +/** + * struct mv_cesa_req_ops - CESA request operations + * @process: process a request chunk result (should return 0 if the + * operation, -EINPROGRESS if it needs more steps or an error + * code) + * @step: launch the crypto operation on the next chunk + * @cleanup: cleanup the crypto request (release associated data) + * @complete: complete the request, i.e copy result or context from sram when + * needed. + */ +struct mv_cesa_req_ops { + int (*process)(struct crypto_async_request *req, u32 status); + void (*step)(struct crypto_async_request *req); + void (*cleanup)(struct crypto_async_request *req); + void (*complete)(struct crypto_async_request *req); +}; + +/** + * struct mv_cesa_ctx - CESA operation context + * @ops: crypto operations + * + * Base context structure inherited by operation specific ones. + */ +struct mv_cesa_ctx { + const struct mv_cesa_req_ops *ops; +}; + +/** + * struct mv_cesa_hash_ctx - CESA hash operation context + * @base: base context structure + * + * Hash context structure. + */ +struct mv_cesa_hash_ctx { + struct mv_cesa_ctx base; +}; + +/** + * struct mv_cesa_hash_ctx - CESA hmac operation context + * @base: base context structure + * @iv: initialization vectors + * + * HMAC context structure. + */ +struct mv_cesa_hmac_ctx { + struct mv_cesa_ctx base; + __be32 iv[16]; +}; + +/** + * enum mv_cesa_req_type - request type definitions + * @CESA_STD_REQ: standard request + * @CESA_DMA_REQ: DMA request + */ +enum mv_cesa_req_type { + CESA_STD_REQ, + CESA_DMA_REQ, +}; + +/** + * struct mv_cesa_req - CESA request + * @engine: engine associated with this request + * @chain: list of tdma descriptors associated with this request + */ +struct mv_cesa_req { + struct mv_cesa_engine *engine; + struct mv_cesa_tdma_chain chain; +}; + +/** + * struct mv_cesa_sg_std_iter - CESA scatter-gather iterator for standard + * requests + * @iter: sg mapping iterator + * @offset: current offset in the SG entry mapped in memory + */ +struct mv_cesa_sg_std_iter { + struct sg_mapping_iter iter; + unsigned int offset; +}; + +/** + * struct mv_cesa_skcipher_std_req - cipher standard request + * @op: operation context + * @offset: current operation offset + * @size: size of the crypto operation + */ +struct mv_cesa_skcipher_std_req { + struct mv_cesa_op_ctx op; + unsigned int offset; + unsigned int size; + bool skip_ctx; +}; + +/** + * struct mv_cesa_skcipher_req - cipher request + * @req: type specific request information + * @src_nents: number of entries in the src sg list + * @dst_nents: number of entries in the dest sg list + */ +struct mv_cesa_skcipher_req { + struct mv_cesa_req base; + struct mv_cesa_skcipher_std_req std; + int src_nents; + int dst_nents; +}; + +/** + * struct mv_cesa_ahash_std_req - standard hash request + * @offset: current operation offset + */ +struct mv_cesa_ahash_std_req { + unsigned int offset; +}; + +/** + * struct mv_cesa_ahash_dma_req - DMA hash request + * @padding: padding buffer + * @padding_dma: DMA address of the padding buffer + * @cache_dma: DMA address of the cache buffer + */ +struct mv_cesa_ahash_dma_req { + u8 *padding; + dma_addr_t padding_dma; + u8 *cache; + dma_addr_t cache_dma; +}; + +/** + * struct mv_cesa_ahash_req - hash request + * @req: type specific request information + * @cache: cache buffer + * @cache_ptr: write pointer in the cache buffer + * @len: hash total length + * @src_nents: number of entries in the scatterlist + * @last_req: define whether the current operation is the last one + * or not + * @state: hash state + */ +struct mv_cesa_ahash_req { + struct mv_cesa_req base; + union { + struct mv_cesa_ahash_dma_req dma; + struct mv_cesa_ahash_std_req std; + } req; + struct mv_cesa_op_ctx op_tmpl; + u8 cache[CESA_MAX_HASH_BLOCK_SIZE]; + unsigned int cache_ptr; + u64 len; + int src_nents; + bool last_req; + bool algo_le; + u32 state[8]; +}; + +/* CESA functions */ + +extern struct mv_cesa_dev *cesa_dev; + + +static inline void +mv_cesa_engine_enqueue_complete_request(struct mv_cesa_engine *engine, + struct crypto_async_request *req) +{ + list_add_tail(&req->list, &engine->complete_queue); +} + +static inline struct crypto_async_request * +mv_cesa_engine_dequeue_complete_request(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req; + + req = list_first_entry_or_null(&engine->complete_queue, + struct crypto_async_request, + list); + if (req) + list_del(&req->list); + + return req; +} + + +static inline enum mv_cesa_req_type +mv_cesa_req_get_type(struct mv_cesa_req *req) +{ + return req->chain.first ? CESA_DMA_REQ : CESA_STD_REQ; +} + +static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op, + u32 cfg, u32 mask) +{ + op->desc.config &= cpu_to_le32(~mask); + op->desc.config |= cpu_to_le32(cfg); +} + +static inline u32 mv_cesa_get_op_cfg(const struct mv_cesa_op_ctx *op) +{ + return le32_to_cpu(op->desc.config); +} + +static inline void mv_cesa_set_op_cfg(struct mv_cesa_op_ctx *op, u32 cfg) +{ + op->desc.config = cpu_to_le32(cfg); +} + +static inline void mv_cesa_adjust_op(struct mv_cesa_engine *engine, + struct mv_cesa_op_ctx *op) +{ + u32 offset = engine->sram_dma & CESA_SA_SRAM_MSK; + + op->desc.enc_p = CESA_SA_DESC_CRYPT_DATA(offset); + op->desc.enc_key_p = CESA_SA_DESC_CRYPT_KEY(offset); + op->desc.enc_iv = CESA_SA_DESC_CRYPT_IV(offset); + op->desc.mac_src_p &= ~CESA_SA_DESC_MAC_DATA_MSK; + op->desc.mac_src_p |= CESA_SA_DESC_MAC_DATA(offset); + op->desc.mac_digest &= ~CESA_SA_DESC_MAC_DIGEST_MSK; + op->desc.mac_digest |= CESA_SA_DESC_MAC_DIGEST(offset); + op->desc.mac_iv = CESA_SA_DESC_MAC_IV(offset); +} + +static inline void mv_cesa_set_crypt_op_len(struct mv_cesa_op_ctx *op, int len) +{ + op->desc.enc_len = cpu_to_le32(len); +} + +static inline void mv_cesa_set_mac_op_total_len(struct mv_cesa_op_ctx *op, + int len) +{ + op->desc.mac_src_p &= ~CESA_SA_DESC_MAC_TOTAL_LEN_MSK; + op->desc.mac_src_p |= CESA_SA_DESC_MAC_TOTAL_LEN(len); +} + +static inline void mv_cesa_set_mac_op_frag_len(struct mv_cesa_op_ctx *op, + int len) +{ + op->desc.mac_digest &= ~CESA_SA_DESC_MAC_FRAG_LEN_MSK; + op->desc.mac_digest |= CESA_SA_DESC_MAC_FRAG_LEN(len); +} + +static inline void mv_cesa_set_int_mask(struct mv_cesa_engine *engine, + u32 int_mask) +{ + if (int_mask == engine->int_mask) + return; + + writel_relaxed(int_mask, engine->regs + CESA_SA_INT_MSK); + engine->int_mask = int_mask; +} + +static inline u32 mv_cesa_get_int_mask(struct mv_cesa_engine *engine) +{ + return engine->int_mask; +} + +static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) +{ + return (mv_cesa_get_op_cfg(op) & CESA_SA_DESC_CFG_FRAG_MSK) == + CESA_SA_DESC_CFG_FIRST_FRAG; +} + +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq); + +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog); + +static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) +{ + int i; + u32 min_load = U32_MAX; + struct mv_cesa_engine *selected = NULL; + + for (i = 0; i < cesa_dev->caps->nengines; i++) { + struct mv_cesa_engine *engine = cesa_dev->engines + i; + u32 load = atomic_read(&engine->load); + + if (load < min_load) { + min_load = load; + selected = engine; + } + } + + atomic_add(weight, &selected->load); + + return selected; +} + +/* + * Helper function that indicates whether a crypto request needs to be + * cleaned up or not after being enqueued using mv_cesa_queue_req(). + */ +static inline int mv_cesa_req_needs_cleanup(struct crypto_async_request *req, + int ret) +{ + /* + * The queue still had some space, the request was queued + * normally, so there's no need to clean it up. + */ + if (ret == -EINPROGRESS) + return false; + + /* + * The queue had not space left, but since the request is + * flagged with CRYPTO_TFM_REQ_MAY_BACKLOG, it was added to + * the backlog and will be processed later. There's no need to + * clean it up. + */ + if (ret == -EBUSY) + return false; + + /* Request wasn't queued, we need to clean it up */ + return true; +} + +/* TDMA functions */ + +static inline void mv_cesa_req_dma_iter_init(struct mv_cesa_dma_iter *iter, + unsigned int len) +{ + iter->len = len; + iter->op_len = min(len, CESA_SA_SRAM_PAYLOAD_SIZE); + iter->offset = 0; +} + +static inline void mv_cesa_sg_dma_iter_init(struct mv_cesa_sg_dma_iter *iter, + struct scatterlist *sg, + enum dma_data_direction dir) +{ + iter->op_offset = 0; + iter->offset = 0; + iter->sg = sg; + iter->dir = dir; +} + +static inline unsigned int +mv_cesa_req_dma_iter_transfer_len(struct mv_cesa_dma_iter *iter, + struct mv_cesa_sg_dma_iter *sgiter) +{ + return min(iter->op_len - sgiter->op_offset, + sg_dma_len(sgiter->sg) - sgiter->offset); +} + +bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *chain, + struct mv_cesa_sg_dma_iter *sgiter, + unsigned int len); + +static inline bool mv_cesa_req_dma_iter_next_op(struct mv_cesa_dma_iter *iter) +{ + iter->offset += iter->op_len; + iter->op_len = min(iter->len - iter->offset, + CESA_SA_SRAM_PAYLOAD_SIZE); + + return iter->op_len; +} + +void mv_cesa_dma_step(struct mv_cesa_req *dreq); + +static inline int mv_cesa_dma_process(struct mv_cesa_req *dreq, + u32 status) +{ + if (!(status & CESA_SA_INT_ACC0_IDMA_DONE)) + return -EINPROGRESS; + + if (status & CESA_SA_INT_IDMA_OWN_ERR) + return -EINVAL; + + return 0; +} + +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, + struct mv_cesa_engine *engine); +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq); +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq); +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status); + + +static inline void +mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) +{ + memset(chain, 0, sizeof(*chain)); +} + +int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags); + +struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, + const struct mv_cesa_op_ctx *op_templ, + bool skip_ctx, + gfp_t flags); + +int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain, + dma_addr_t dst, dma_addr_t src, u32 size, + u32 flags, gfp_t gfp_flags); + +int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain, gfp_t flags); +int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, gfp_t flags); + +int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_dma_iter *dma_iter, + struct mv_cesa_sg_dma_iter *sgiter, + gfp_t gfp_flags); + +size_t mv_cesa_sg_copy(struct mv_cesa_engine *engine, + struct scatterlist *sgl, unsigned int nents, + unsigned int sram_off, size_t buflen, off_t skip, + bool to_sram); + +static inline size_t mv_cesa_sg_copy_to_sram(struct mv_cesa_engine *engine, + struct scatterlist *sgl, + unsigned int nents, + unsigned int sram_off, + size_t buflen, off_t skip) +{ + return mv_cesa_sg_copy(engine, sgl, nents, sram_off, buflen, skip, + true); +} + +static inline size_t mv_cesa_sg_copy_from_sram(struct mv_cesa_engine *engine, + struct scatterlist *sgl, + unsigned int nents, + unsigned int sram_off, + size_t buflen, off_t skip) +{ + return mv_cesa_sg_copy(engine, sgl, nents, sram_off, buflen, skip, + false); +} + +/* Algorithm definitions */ + +extern struct ahash_alg mv_md5_alg; +extern struct ahash_alg mv_sha1_alg; +extern struct ahash_alg mv_sha256_alg; +extern struct ahash_alg mv_ahmac_md5_alg; +extern struct ahash_alg mv_ahmac_sha1_alg; +extern struct ahash_alg mv_ahmac_sha256_alg; + +extern struct skcipher_alg mv_cesa_ecb_des_alg; +extern struct skcipher_alg mv_cesa_cbc_des_alg; +extern struct skcipher_alg mv_cesa_ecb_des3_ede_alg; +extern struct skcipher_alg mv_cesa_cbc_des3_ede_alg; +extern struct skcipher_alg mv_cesa_ecb_aes_alg; +extern struct skcipher_alg mv_cesa_cbc_aes_alg; + +#endif /* __MARVELL_CESA_H__ */ diff --git a/drivers/crypto/marvell/cesa/cipher.c b/drivers/crypto/marvell/cesa/cipher.c new file mode 100644 index 0000000000..0f37dfd42d --- /dev/null +++ b/drivers/crypto/marvell/cesa/cipher.c @@ -0,0 +1,817 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Cipher algorithms supported by the CESA: DES, 3DES and AES. + * + * Author: Boris Brezillon <boris.brezillon@free-electrons.com> + * Author: Arnaud Ebalard <arno@natisbad.org> + * + * This work is based on an initial version written by + * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc > + */ + +#include <crypto/aes.h> +#include <crypto/internal/des.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> + +#include "cesa.h" + +struct mv_cesa_des_ctx { + struct mv_cesa_ctx base; + u8 key[DES_KEY_SIZE]; +}; + +struct mv_cesa_des3_ctx { + struct mv_cesa_ctx base; + u8 key[DES3_EDE_KEY_SIZE]; +}; + +struct mv_cesa_aes_ctx { + struct mv_cesa_ctx base; + struct crypto_aes_ctx aes; +}; + +struct mv_cesa_skcipher_dma_iter { + struct mv_cesa_dma_iter base; + struct mv_cesa_sg_dma_iter src; + struct mv_cesa_sg_dma_iter dst; +}; + +static inline void +mv_cesa_skcipher_req_iter_init(struct mv_cesa_skcipher_dma_iter *iter, + struct skcipher_request *req) +{ + mv_cesa_req_dma_iter_init(&iter->base, req->cryptlen); + mv_cesa_sg_dma_iter_init(&iter->src, req->src, DMA_TO_DEVICE); + mv_cesa_sg_dma_iter_init(&iter->dst, req->dst, DMA_FROM_DEVICE); +} + +static inline bool +mv_cesa_skcipher_req_iter_next_op(struct mv_cesa_skcipher_dma_iter *iter) +{ + iter->src.op_offset = 0; + iter->dst.op_offset = 0; + + return mv_cesa_req_dma_iter_next_op(&iter->base); +} + +static inline void +mv_cesa_skcipher_dma_cleanup(struct skcipher_request *req) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + + if (req->dst != req->src) { + dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents, + DMA_FROM_DEVICE); + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_TO_DEVICE); + } else { + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_BIDIRECTIONAL); + } + mv_cesa_dma_cleanup(&creq->base); +} + +static inline void mv_cesa_skcipher_cleanup(struct skcipher_request *req) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_skcipher_dma_cleanup(req); +} + +static void mv_cesa_skcipher_std_step(struct skcipher_request *req) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct mv_cesa_skcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; + size_t len = min_t(size_t, req->cryptlen - sreq->offset, + CESA_SA_SRAM_PAYLOAD_SIZE); + + mv_cesa_adjust_op(engine, &sreq->op); + if (engine->pool) + memcpy(engine->sram_pool, &sreq->op, sizeof(sreq->op)); + else + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); + + len = mv_cesa_sg_copy_to_sram(engine, req->src, creq->src_nents, + CESA_SA_DATA_SRAM_OFFSET, len, + sreq->offset); + + sreq->size = len; + mv_cesa_set_crypt_op_len(&sreq->op, len); + + /* FIXME: only update enc_len field */ + if (!sreq->skip_ctx) { + if (engine->pool) + memcpy(engine->sram_pool, &sreq->op, sizeof(sreq->op)); + else + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); + sreq->skip_ctx = true; + } else if (engine->pool) + memcpy(engine->sram_pool, &sreq->op, sizeof(sreq->op.desc)); + else + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op.desc)); + + mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); + writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); + writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); +} + +static int mv_cesa_skcipher_std_process(struct skcipher_request *req, + u32 status) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct mv_cesa_skcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; + size_t len; + + len = mv_cesa_sg_copy_from_sram(engine, req->dst, creq->dst_nents, + CESA_SA_DATA_SRAM_OFFSET, sreq->size, + sreq->offset); + + sreq->offset += len; + if (sreq->offset < req->cryptlen) + return -EINPROGRESS; + + return 0; +} + +static int mv_cesa_skcipher_process(struct crypto_async_request *req, + u32 status) +{ + struct skcipher_request *skreq = skcipher_request_cast(req); + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq); + struct mv_cesa_req *basereq = &creq->base; + + if (mv_cesa_req_get_type(basereq) == CESA_STD_REQ) + return mv_cesa_skcipher_std_process(skreq, status); + + return mv_cesa_dma_process(basereq, status); +} + +static void mv_cesa_skcipher_step(struct crypto_async_request *req) +{ + struct skcipher_request *skreq = skcipher_request_cast(req); + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); + else + mv_cesa_skcipher_std_step(skreq); +} + +static inline void +mv_cesa_skcipher_dma_prepare(struct skcipher_request *req) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct mv_cesa_req *basereq = &creq->base; + + mv_cesa_dma_prepare(basereq, basereq->engine); +} + +static inline void +mv_cesa_skcipher_std_prepare(struct skcipher_request *req) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct mv_cesa_skcipher_std_req *sreq = &creq->std; + + sreq->size = 0; + sreq->offset = 0; +} + +static inline void mv_cesa_skcipher_prepare(struct crypto_async_request *req, + struct mv_cesa_engine *engine) +{ + struct skcipher_request *skreq = skcipher_request_cast(req); + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq); + + creq->base.engine = engine; + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_skcipher_dma_prepare(skreq); + else + mv_cesa_skcipher_std_prepare(skreq); +} + +static inline void +mv_cesa_skcipher_req_cleanup(struct crypto_async_request *req) +{ + struct skcipher_request *skreq = skcipher_request_cast(req); + + mv_cesa_skcipher_cleanup(skreq); +} + +static void +mv_cesa_skcipher_complete(struct crypto_async_request *req) +{ + struct skcipher_request *skreq = skcipher_request_cast(req); + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(skreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int ivsize; + + atomic_sub(skreq->cryptlen, &engine->load); + ivsize = crypto_skcipher_ivsize(crypto_skcipher_reqtfm(skreq)); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { + struct mv_cesa_req *basereq; + + basereq = &creq->base; + memcpy(skreq->iv, basereq->chain.last->op->ctx.skcipher.iv, + ivsize); + } else if (engine->pool) + memcpy(skreq->iv, + engine->sram_pool + CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize); + else + memcpy_fromio(skreq->iv, + engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize); +} + +static const struct mv_cesa_req_ops mv_cesa_skcipher_req_ops = { + .step = mv_cesa_skcipher_step, + .process = mv_cesa_skcipher_process, + .cleanup = mv_cesa_skcipher_req_cleanup, + .complete = mv_cesa_skcipher_complete, +}; + +static void mv_cesa_skcipher_cra_exit(struct crypto_tfm *tfm) +{ + void *ctx = crypto_tfm_ctx(tfm); + + memzero_explicit(ctx, tfm->__crt_alg->cra_ctxsize); +} + +static int mv_cesa_skcipher_cra_init(struct crypto_tfm *tfm) +{ + struct mv_cesa_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->ops = &mv_cesa_skcipher_req_ops; + + crypto_skcipher_set_reqsize(__crypto_skcipher_cast(tfm), + sizeof(struct mv_cesa_skcipher_req)); + + return 0; +} + +static int mv_cesa_aes_setkey(struct crypto_skcipher *cipher, const u8 *key, + unsigned int len) +{ + struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); + struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(tfm); + int remaining; + int offset; + int ret; + int i; + + ret = aes_expandkey(&ctx->aes, key, len); + if (ret) + return ret; + + remaining = (ctx->aes.key_length - 16) / 4; + offset = ctx->aes.key_length + 24 - remaining; + for (i = 0; i < remaining; i++) + ctx->aes.key_dec[4 + i] = ctx->aes.key_enc[offset + i]; + + return 0; +} + +static int mv_cesa_des_setkey(struct crypto_skcipher *cipher, const u8 *key, + unsigned int len) +{ + struct mv_cesa_des_ctx *ctx = crypto_skcipher_ctx(cipher); + int err; + + err = verify_skcipher_des_key(cipher, key); + if (err) + return err; + + memcpy(ctx->key, key, DES_KEY_SIZE); + + return 0; +} + +static int mv_cesa_des3_ede_setkey(struct crypto_skcipher *cipher, + const u8 *key, unsigned int len) +{ + struct mv_cesa_des3_ctx *ctx = crypto_skcipher_ctx(cipher); + int err; + + err = verify_skcipher_des3_key(cipher, key); + if (err) + return err; + + memcpy(ctx->key, key, DES3_EDE_KEY_SIZE); + + return 0; +} + +static int mv_cesa_skcipher_dma_req_init(struct skcipher_request *req, + const struct mv_cesa_op_ctx *op_templ) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + struct mv_cesa_req *basereq = &creq->base; + struct mv_cesa_skcipher_dma_iter iter; + bool skip_ctx = false; + int ret; + + basereq->chain.first = NULL; + basereq->chain.last = NULL; + + if (req->src != req->dst) { + ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_TO_DEVICE); + if (!ret) + return -ENOMEM; + + ret = dma_map_sg(cesa_dev->dev, req->dst, creq->dst_nents, + DMA_FROM_DEVICE); + if (!ret) { + ret = -ENOMEM; + goto err_unmap_src; + } + } else { + ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_BIDIRECTIONAL); + if (!ret) + return -ENOMEM; + } + + mv_cesa_tdma_desc_iter_init(&basereq->chain); + mv_cesa_skcipher_req_iter_init(&iter, req); + + do { + struct mv_cesa_op_ctx *op; + + op = mv_cesa_dma_add_op(&basereq->chain, op_templ, skip_ctx, + flags); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto err_free_tdma; + } + skip_ctx = true; + + mv_cesa_set_crypt_op_len(op, iter.base.op_len); + + /* Add input transfers */ + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, + &iter.src, flags); + if (ret) + goto err_free_tdma; + + /* Add dummy desc to launch the crypto operation */ + ret = mv_cesa_dma_add_dummy_launch(&basereq->chain, flags); + if (ret) + goto err_free_tdma; + + /* Add output transfers */ + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, + &iter.dst, flags); + if (ret) + goto err_free_tdma; + + } while (mv_cesa_skcipher_req_iter_next_op(&iter)); + + /* Add output data for IV */ + ret = mv_cesa_dma_add_result_op(&basereq->chain, + CESA_SA_CFG_SRAM_OFFSET, + CESA_SA_DATA_SRAM_OFFSET, + CESA_TDMA_SRC_IN_SRAM, flags); + + if (ret) + goto err_free_tdma; + + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ; + + return 0; + +err_free_tdma: + mv_cesa_dma_cleanup(basereq); + if (req->dst != req->src) + dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents, + DMA_FROM_DEVICE); + +err_unmap_src: + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, + req->dst != req->src ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); + + return ret; +} + +static inline int +mv_cesa_skcipher_std_req_init(struct skcipher_request *req, + const struct mv_cesa_op_ctx *op_templ) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct mv_cesa_skcipher_std_req *sreq = &creq->std; + struct mv_cesa_req *basereq = &creq->base; + + sreq->op = *op_templ; + sreq->skip_ctx = false; + basereq->chain.first = NULL; + basereq->chain.last = NULL; + + return 0; +} + +static int mv_cesa_skcipher_req_init(struct skcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + unsigned int blksize = crypto_skcipher_blocksize(tfm); + int ret; + + if (!IS_ALIGNED(req->cryptlen, blksize)) + return -EINVAL; + + creq->src_nents = sg_nents_for_len(req->src, req->cryptlen); + if (creq->src_nents < 0) { + dev_err(cesa_dev->dev, "Invalid number of src SG"); + return creq->src_nents; + } + creq->dst_nents = sg_nents_for_len(req->dst, req->cryptlen); + if (creq->dst_nents < 0) { + dev_err(cesa_dev->dev, "Invalid number of dst SG"); + return creq->dst_nents; + } + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_OP_CRYPT_ONLY, + CESA_SA_DESC_CFG_OP_MSK); + + if (cesa_dev->caps->has_tdma) + ret = mv_cesa_skcipher_dma_req_init(req, tmpl); + else + ret = mv_cesa_skcipher_std_req_init(req, tmpl); + + return ret; +} + +static int mv_cesa_skcipher_queue_req(struct skcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + int ret; + struct mv_cesa_skcipher_req *creq = skcipher_request_ctx(req); + struct mv_cesa_engine *engine; + + ret = mv_cesa_skcipher_req_init(req, tmpl); + if (ret) + return ret; + + engine = mv_cesa_select_engine(req->cryptlen); + mv_cesa_skcipher_prepare(&req->base, engine); + + ret = mv_cesa_queue_req(&req->base, &creq->base); + + if (mv_cesa_req_needs_cleanup(&req->base, ret)) + mv_cesa_skcipher_cleanup(req); + + return ret; +} + +static int mv_cesa_des_op(struct skcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, + CESA_SA_DESC_CFG_CRYPTM_MSK); + + memcpy(tmpl->ctx.skcipher.key, ctx->key, DES_KEY_SIZE); + + return mv_cesa_skcipher_queue_req(req, tmpl); +} + +static int mv_cesa_ecb_des_encrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_des_op(req, &tmpl); +} + +static int mv_cesa_ecb_des_decrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_des_op(req, &tmpl); +} + +struct skcipher_alg mv_cesa_ecb_des_alg = { + .setkey = mv_cesa_des_setkey, + .encrypt = mv_cesa_ecb_des_encrypt, + .decrypt = mv_cesa_ecb_des_decrypt, + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .base = { + .cra_name = "ecb(des)", + .cra_driver_name = "mv-ecb-des", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_des_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_skcipher_cra_init, + .cra_exit = mv_cesa_skcipher_cra_exit, + }, +}; + +static int mv_cesa_cbc_des_op(struct skcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTCM_CBC, + CESA_SA_DESC_CFG_CRYPTCM_MSK); + + memcpy(tmpl->ctx.skcipher.iv, req->iv, DES_BLOCK_SIZE); + + return mv_cesa_des_op(req, tmpl); +} + +static int mv_cesa_cbc_des_encrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_cbc_des_op(req, &tmpl); +} + +static int mv_cesa_cbc_des_decrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_cbc_des_op(req, &tmpl); +} + +struct skcipher_alg mv_cesa_cbc_des_alg = { + .setkey = mv_cesa_des_setkey, + .encrypt = mv_cesa_cbc_des_encrypt, + .decrypt = mv_cesa_cbc_des_decrypt, + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .base = { + .cra_name = "cbc(des)", + .cra_driver_name = "mv-cbc-des", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_des_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_skcipher_cra_init, + .cra_exit = mv_cesa_skcipher_cra_exit, + }, +}; + +static int mv_cesa_des3_op(struct skcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES, + CESA_SA_DESC_CFG_CRYPTM_MSK); + + memcpy(tmpl->ctx.skcipher.key, ctx->key, DES3_EDE_KEY_SIZE); + + return mv_cesa_skcipher_queue_req(req, tmpl); +} + +static int mv_cesa_ecb_des3_ede_encrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_3DES_EDE | + CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_des3_op(req, &tmpl); +} + +static int mv_cesa_ecb_des3_ede_decrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_3DES_EDE | + CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_des3_op(req, &tmpl); +} + +struct skcipher_alg mv_cesa_ecb_des3_ede_alg = { + .setkey = mv_cesa_des3_ede_setkey, + .encrypt = mv_cesa_ecb_des3_ede_encrypt, + .decrypt = mv_cesa_ecb_des3_ede_decrypt, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .base = { + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "mv-ecb-des3-ede", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_des3_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_skcipher_cra_init, + .cra_exit = mv_cesa_skcipher_cra_exit, + }, +}; + +static int mv_cesa_cbc_des3_op(struct skcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + memcpy(tmpl->ctx.skcipher.iv, req->iv, DES3_EDE_BLOCK_SIZE); + + return mv_cesa_des3_op(req, tmpl); +} + +static int mv_cesa_cbc_des3_ede_encrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_CBC | + CESA_SA_DESC_CFG_3DES_EDE | + CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_cbc_des3_op(req, &tmpl); +} + +static int mv_cesa_cbc_des3_ede_decrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_CBC | + CESA_SA_DESC_CFG_3DES_EDE | + CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_cbc_des3_op(req, &tmpl); +} + +struct skcipher_alg mv_cesa_cbc_des3_ede_alg = { + .setkey = mv_cesa_des3_ede_setkey, + .encrypt = mv_cesa_cbc_des3_ede_encrypt, + .decrypt = mv_cesa_cbc_des3_ede_decrypt, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .base = { + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "mv-cbc-des3-ede", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_des3_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_skcipher_cra_init, + .cra_exit = mv_cesa_skcipher_cra_exit, + }, +}; + +static int mv_cesa_aes_op(struct skcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + int i; + u32 *key; + u32 cfg; + + cfg = CESA_SA_DESC_CFG_CRYPTM_AES; + + if (mv_cesa_get_op_cfg(tmpl) & CESA_SA_DESC_CFG_DIR_DEC) + key = ctx->aes.key_dec; + else + key = ctx->aes.key_enc; + + for (i = 0; i < ctx->aes.key_length / sizeof(u32); i++) + tmpl->ctx.skcipher.key[i] = cpu_to_le32(key[i]); + + if (ctx->aes.key_length == 24) + cfg |= CESA_SA_DESC_CFG_AES_LEN_192; + else if (ctx->aes.key_length == 32) + cfg |= CESA_SA_DESC_CFG_AES_LEN_256; + + mv_cesa_update_op_cfg(tmpl, cfg, + CESA_SA_DESC_CFG_CRYPTM_MSK | + CESA_SA_DESC_CFG_AES_LEN_MSK); + + return mv_cesa_skcipher_queue_req(req, tmpl); +} + +static int mv_cesa_ecb_aes_encrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_aes_op(req, &tmpl); +} + +static int mv_cesa_ecb_aes_decrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, + CESA_SA_DESC_CFG_CRYPTCM_ECB | + CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_aes_op(req, &tmpl); +} + +struct skcipher_alg mv_cesa_ecb_aes_alg = { + .setkey = mv_cesa_aes_setkey, + .encrypt = mv_cesa_ecb_aes_encrypt, + .decrypt = mv_cesa_ecb_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .base = { + .cra_name = "ecb(aes)", + .cra_driver_name = "mv-ecb-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_aes_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_skcipher_cra_init, + .cra_exit = mv_cesa_skcipher_cra_exit, + }, +}; + +static int mv_cesa_cbc_aes_op(struct skcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTCM_CBC, + CESA_SA_DESC_CFG_CRYPTCM_MSK); + memcpy(tmpl->ctx.skcipher.iv, req->iv, AES_BLOCK_SIZE); + + return mv_cesa_aes_op(req, tmpl); +} + +static int mv_cesa_cbc_aes_encrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_ENC); + + return mv_cesa_cbc_aes_op(req, &tmpl); +} + +static int mv_cesa_cbc_aes_decrypt(struct skcipher_request *req) +{ + struct mv_cesa_op_ctx tmpl; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_DIR_DEC); + + return mv_cesa_cbc_aes_op(req, &tmpl); +} + +struct skcipher_alg mv_cesa_cbc_aes_alg = { + .setkey = mv_cesa_aes_setkey, + .encrypt = mv_cesa_cbc_aes_encrypt, + .decrypt = mv_cesa_cbc_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "cbc(aes)", + .cra_driver_name = "mv-cbc-aes", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_aes_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = mv_cesa_skcipher_cra_init, + .cra_exit = mv_cesa_skcipher_cra_exit, + }, +}; diff --git a/drivers/crypto/marvell/cesa/hash.c b/drivers/crypto/marvell/cesa/hash.c new file mode 100644 index 0000000000..8d84ad4557 --- /dev/null +++ b/drivers/crypto/marvell/cesa/hash.c @@ -0,0 +1,1457 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Hash algorithms supported by the CESA: MD5, SHA1 and SHA256. + * + * Author: Boris Brezillon <boris.brezillon@free-electrons.com> + * Author: Arnaud Ebalard <arno@natisbad.org> + * + * This work is based on an initial version written by + * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc > + */ + +#include <crypto/hmac.h> +#include <crypto/md5.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> + +#include "cesa.h" + +struct mv_cesa_ahash_dma_iter { + struct mv_cesa_dma_iter base; + struct mv_cesa_sg_dma_iter src; +}; + +static inline void +mv_cesa_ahash_req_iter_init(struct mv_cesa_ahash_dma_iter *iter, + struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int len = req->nbytes + creq->cache_ptr; + + if (!creq->last_req) + len &= ~CESA_HASH_BLOCK_SIZE_MSK; + + mv_cesa_req_dma_iter_init(&iter->base, len); + mv_cesa_sg_dma_iter_init(&iter->src, req->src, DMA_TO_DEVICE); + iter->src.op_offset = creq->cache_ptr; +} + +static inline bool +mv_cesa_ahash_req_iter_next_op(struct mv_cesa_ahash_dma_iter *iter) +{ + iter->src.op_offset = 0; + + return mv_cesa_req_dma_iter_next_op(&iter->base); +} + +static inline int +mv_cesa_ahash_dma_alloc_cache(struct mv_cesa_ahash_dma_req *req, gfp_t flags) +{ + req->cache = dma_pool_alloc(cesa_dev->dma->cache_pool, flags, + &req->cache_dma); + if (!req->cache) + return -ENOMEM; + + return 0; +} + +static inline void +mv_cesa_ahash_dma_free_cache(struct mv_cesa_ahash_dma_req *req) +{ + if (!req->cache) + return; + + dma_pool_free(cesa_dev->dma->cache_pool, req->cache, + req->cache_dma); +} + +static int mv_cesa_ahash_dma_alloc_padding(struct mv_cesa_ahash_dma_req *req, + gfp_t flags) +{ + if (req->padding) + return 0; + + req->padding = dma_pool_alloc(cesa_dev->dma->padding_pool, flags, + &req->padding_dma); + if (!req->padding) + return -ENOMEM; + + return 0; +} + +static void mv_cesa_ahash_dma_free_padding(struct mv_cesa_ahash_dma_req *req) +{ + if (!req->padding) + return; + + dma_pool_free(cesa_dev->dma->padding_pool, req->padding, + req->padding_dma); + req->padding = NULL; +} + +static inline void mv_cesa_ahash_dma_last_cleanup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + mv_cesa_ahash_dma_free_padding(&creq->req.dma); +} + +static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); + mv_cesa_ahash_dma_free_cache(&creq->req.dma); + mv_cesa_dma_cleanup(&creq->base); +} + +static inline void mv_cesa_ahash_cleanup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_ahash_dma_cleanup(req); +} + +static void mv_cesa_ahash_last_cleanup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_ahash_dma_last_cleanup(req); +} + +static int mv_cesa_ahash_pad_len(struct mv_cesa_ahash_req *creq) +{ + unsigned int index, padlen; + + index = creq->len & CESA_HASH_BLOCK_SIZE_MSK; + padlen = (index < 56) ? (56 - index) : (64 + 56 - index); + + return padlen; +} + +static int mv_cesa_ahash_pad_req(struct mv_cesa_ahash_req *creq, u8 *buf) +{ + unsigned int padlen; + + buf[0] = 0x80; + /* Pad out to 56 mod 64 */ + padlen = mv_cesa_ahash_pad_len(creq); + memset(buf + 1, 0, padlen - 1); + + if (creq->algo_le) { + __le64 bits = cpu_to_le64(creq->len << 3); + + memcpy(buf + padlen, &bits, sizeof(bits)); + } else { + __be64 bits = cpu_to_be64(creq->len << 3); + + memcpy(buf + padlen, &bits, sizeof(bits)); + } + + return padlen + 8; +} + +static void mv_cesa_ahash_std_step(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_ahash_std_req *sreq = &creq->req.std; + struct mv_cesa_engine *engine = creq->base.engine; + struct mv_cesa_op_ctx *op; + unsigned int new_cache_ptr = 0; + u32 frag_mode; + size_t len; + unsigned int digsize; + int i; + + mv_cesa_adjust_op(engine, &creq->op_tmpl); + if (engine->pool) + memcpy(engine->sram_pool, &creq->op_tmpl, + sizeof(creq->op_tmpl)); + else + memcpy_toio(engine->sram, &creq->op_tmpl, + sizeof(creq->op_tmpl)); + + if (!sreq->offset) { + digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + for (i = 0; i < digsize / 4; i++) + writel_relaxed(creq->state[i], + engine->regs + CESA_IVDIG(i)); + } + + if (creq->cache_ptr) { + if (engine->pool) + memcpy(engine->sram_pool + CESA_SA_DATA_SRAM_OFFSET, + creq->cache, creq->cache_ptr); + else + memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, + creq->cache, creq->cache_ptr); + } + + len = min_t(size_t, req->nbytes + creq->cache_ptr - sreq->offset, + CESA_SA_SRAM_PAYLOAD_SIZE); + + if (!creq->last_req) { + new_cache_ptr = len & CESA_HASH_BLOCK_SIZE_MSK; + len &= ~CESA_HASH_BLOCK_SIZE_MSK; + } + + if (len - creq->cache_ptr) + sreq->offset += mv_cesa_sg_copy_to_sram( + engine, req->src, creq->src_nents, + CESA_SA_DATA_SRAM_OFFSET + creq->cache_ptr, + len - creq->cache_ptr, sreq->offset); + + op = &creq->op_tmpl; + + frag_mode = mv_cesa_get_op_cfg(op) & CESA_SA_DESC_CFG_FRAG_MSK; + + if (creq->last_req && sreq->offset == req->nbytes && + creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) { + if (frag_mode == CESA_SA_DESC_CFG_FIRST_FRAG) + frag_mode = CESA_SA_DESC_CFG_NOT_FRAG; + else if (frag_mode == CESA_SA_DESC_CFG_MID_FRAG) + frag_mode = CESA_SA_DESC_CFG_LAST_FRAG; + } + + if (frag_mode == CESA_SA_DESC_CFG_NOT_FRAG || + frag_mode == CESA_SA_DESC_CFG_LAST_FRAG) { + if (len && + creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX) { + mv_cesa_set_mac_op_total_len(op, creq->len); + } else { + int trailerlen = mv_cesa_ahash_pad_len(creq) + 8; + + if (len + trailerlen > CESA_SA_SRAM_PAYLOAD_SIZE) { + len &= CESA_HASH_BLOCK_SIZE_MSK; + new_cache_ptr = 64 - trailerlen; + if (engine->pool) + memcpy(creq->cache, + engine->sram_pool + + CESA_SA_DATA_SRAM_OFFSET + len, + new_cache_ptr); + else + memcpy_fromio(creq->cache, + engine->sram + + CESA_SA_DATA_SRAM_OFFSET + + len, + new_cache_ptr); + } else { + i = mv_cesa_ahash_pad_req(creq, creq->cache); + len += i; + if (engine->pool) + memcpy(engine->sram_pool + len + + CESA_SA_DATA_SRAM_OFFSET, + creq->cache, i); + else + memcpy_toio(engine->sram + len + + CESA_SA_DATA_SRAM_OFFSET, + creq->cache, i); + } + + if (frag_mode == CESA_SA_DESC_CFG_LAST_FRAG) + frag_mode = CESA_SA_DESC_CFG_MID_FRAG; + else + frag_mode = CESA_SA_DESC_CFG_FIRST_FRAG; + } + } + + mv_cesa_set_mac_op_frag_len(op, len); + mv_cesa_update_op_cfg(op, frag_mode, CESA_SA_DESC_CFG_FRAG_MSK); + + /* FIXME: only update enc_len field */ + if (engine->pool) + memcpy(engine->sram_pool, op, sizeof(*op)); + else + memcpy_toio(engine->sram, op, sizeof(*op)); + + if (frag_mode == CESA_SA_DESC_CFG_FIRST_FRAG) + mv_cesa_update_op_cfg(op, CESA_SA_DESC_CFG_MID_FRAG, + CESA_SA_DESC_CFG_FRAG_MSK); + + creq->cache_ptr = new_cache_ptr; + + mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); + writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); + writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); +} + +static int mv_cesa_ahash_std_process(struct ahash_request *req, u32 status) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_ahash_std_req *sreq = &creq->req.std; + + if (sreq->offset < (req->nbytes - creq->cache_ptr)) + return -EINPROGRESS; + + return 0; +} + +static inline void mv_cesa_ahash_dma_prepare(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_req *basereq = &creq->base; + + mv_cesa_dma_prepare(basereq, basereq->engine); +} + +static void mv_cesa_ahash_std_prepare(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_ahash_std_req *sreq = &creq->req.std; + + sreq->offset = 0; +} + +static void mv_cesa_ahash_dma_step(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_req *base = &creq->base; + + /* We must explicitly set the digest state. */ + if (base->chain.first->flags & CESA_TDMA_SET_STATE) { + struct mv_cesa_engine *engine = base->engine; + int i; + + /* Set the hash state in the IVDIG regs. */ + for (i = 0; i < ARRAY_SIZE(creq->state); i++) + writel_relaxed(creq->state[i], engine->regs + + CESA_IVDIG(i)); + } + + mv_cesa_dma_step(base); +} + +static void mv_cesa_ahash_step(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_ahash_dma_step(ahashreq); + else + mv_cesa_ahash_std_step(ahashreq); +} + +static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + return mv_cesa_dma_process(&creq->base, status); + + return mv_cesa_ahash_std_process(ahashreq, status); +} + +static void mv_cesa_ahash_complete(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int digsize; + int i; + + digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ && + (creq->base.chain.last->flags & CESA_TDMA_TYPE_MSK) == + CESA_TDMA_RESULT) { + __le32 *data = NULL; + + /* + * Result is already in the correct endianness when the SA is + * used + */ + data = creq->base.chain.last->op->ctx.hash.hash; + for (i = 0; i < digsize / 4; i++) + creq->state[i] = le32_to_cpu(data[i]); + + memcpy(ahashreq->result, data, digsize); + } else { + for (i = 0; i < digsize / 4; i++) + creq->state[i] = readl_relaxed(engine->regs + + CESA_IVDIG(i)); + if (creq->last_req) { + /* + * Hardware's MD5 digest is in little endian format, but + * SHA in big endian format + */ + if (creq->algo_le) { + __le32 *result = (void *)ahashreq->result; + + for (i = 0; i < digsize / 4; i++) + result[i] = cpu_to_le32(creq->state[i]); + } else { + __be32 *result = (void *)ahashreq->result; + + for (i = 0; i < digsize / 4; i++) + result[i] = cpu_to_be32(creq->state[i]); + } + } + } + + atomic_sub(ahashreq->nbytes, &engine->load); +} + +static void mv_cesa_ahash_prepare(struct crypto_async_request *req, + struct mv_cesa_engine *engine) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + + creq->base.engine = engine; + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_ahash_dma_prepare(ahashreq); + else + mv_cesa_ahash_std_prepare(ahashreq); +} + +static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + + if (creq->last_req) + mv_cesa_ahash_last_cleanup(ahashreq); + + mv_cesa_ahash_cleanup(ahashreq); + + if (creq->cache_ptr) + sg_pcopy_to_buffer(ahashreq->src, creq->src_nents, + creq->cache, + creq->cache_ptr, + ahashreq->nbytes - creq->cache_ptr); +} + +static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { + .step = mv_cesa_ahash_step, + .process = mv_cesa_ahash_process, + .cleanup = mv_cesa_ahash_req_cleanup, + .complete = mv_cesa_ahash_complete, +}; + +static void mv_cesa_ahash_init(struct ahash_request *req, + struct mv_cesa_op_ctx *tmpl, bool algo_le) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + memset(creq, 0, sizeof(*creq)); + mv_cesa_update_op_cfg(tmpl, + CESA_SA_DESC_CFG_OP_MAC_ONLY | + CESA_SA_DESC_CFG_FIRST_FRAG, + CESA_SA_DESC_CFG_OP_MSK | + CESA_SA_DESC_CFG_FRAG_MSK); + mv_cesa_set_mac_op_total_len(tmpl, 0); + mv_cesa_set_mac_op_frag_len(tmpl, 0); + creq->op_tmpl = *tmpl; + creq->len = 0; + creq->algo_le = algo_le; +} + +static inline int mv_cesa_ahash_cra_init(struct crypto_tfm *tfm) +{ + struct mv_cesa_hash_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->base.ops = &mv_cesa_ahash_req_ops; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mv_cesa_ahash_req)); + return 0; +} + +static bool mv_cesa_ahash_cache_req(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + bool cached = false; + + if (creq->cache_ptr + req->nbytes < CESA_MAX_HASH_BLOCK_SIZE && + !creq->last_req) { + cached = true; + + if (!req->nbytes) + return cached; + + sg_pcopy_to_buffer(req->src, creq->src_nents, + creq->cache + creq->cache_ptr, + req->nbytes, 0); + + creq->cache_ptr += req->nbytes; + } + + return cached; +} + +static struct mv_cesa_op_ctx * +mv_cesa_dma_add_frag(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_op_ctx *tmpl, unsigned int frag_len, + gfp_t flags) +{ + struct mv_cesa_op_ctx *op; + int ret; + + op = mv_cesa_dma_add_op(chain, tmpl, false, flags); + if (IS_ERR(op)) + return op; + + /* Set the operation block fragment length. */ + mv_cesa_set_mac_op_frag_len(op, frag_len); + + /* Append dummy desc to launch operation */ + ret = mv_cesa_dma_add_dummy_launch(chain, flags); + if (ret) + return ERR_PTR(ret); + + if (mv_cesa_mac_op_is_first_frag(tmpl)) + mv_cesa_update_op_cfg(tmpl, + CESA_SA_DESC_CFG_MID_FRAG, + CESA_SA_DESC_CFG_FRAG_MSK); + + return op; +} + +static int +mv_cesa_ahash_dma_add_cache(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_ahash_req *creq, + gfp_t flags) +{ + struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; + int ret; + + if (!creq->cache_ptr) + return 0; + + ret = mv_cesa_ahash_dma_alloc_cache(ahashdreq, flags); + if (ret) + return ret; + + memcpy(ahashdreq->cache, creq->cache, creq->cache_ptr); + + return mv_cesa_dma_add_data_transfer(chain, + CESA_SA_DATA_SRAM_OFFSET, + ahashdreq->cache_dma, + creq->cache_ptr, + CESA_TDMA_DST_IN_SRAM, + flags); +} + +static struct mv_cesa_op_ctx * +mv_cesa_ahash_dma_last_req(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_ahash_dma_iter *dma_iter, + struct mv_cesa_ahash_req *creq, + unsigned int frag_len, gfp_t flags) +{ + struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; + unsigned int len, trailerlen, padoff = 0; + struct mv_cesa_op_ctx *op; + int ret; + + /* + * If the transfer is smaller than our maximum length, and we have + * some data outstanding, we can ask the engine to finish the hash. + */ + if (creq->len <= CESA_SA_DESC_MAC_SRC_TOTAL_LEN_MAX && frag_len) { + op = mv_cesa_dma_add_frag(chain, &creq->op_tmpl, frag_len, + flags); + if (IS_ERR(op)) + return op; + + mv_cesa_set_mac_op_total_len(op, creq->len); + mv_cesa_update_op_cfg(op, mv_cesa_mac_op_is_first_frag(op) ? + CESA_SA_DESC_CFG_NOT_FRAG : + CESA_SA_DESC_CFG_LAST_FRAG, + CESA_SA_DESC_CFG_FRAG_MSK); + + ret = mv_cesa_dma_add_result_op(chain, + CESA_SA_CFG_SRAM_OFFSET, + CESA_SA_DATA_SRAM_OFFSET, + CESA_TDMA_SRC_IN_SRAM, flags); + if (ret) + return ERR_PTR(-ENOMEM); + return op; + } + + /* + * The request is longer than the engine can handle, or we have + * no data outstanding. Manually generate the padding, adding it + * as a "mid" fragment. + */ + ret = mv_cesa_ahash_dma_alloc_padding(ahashdreq, flags); + if (ret) + return ERR_PTR(ret); + + trailerlen = mv_cesa_ahash_pad_req(creq, ahashdreq->padding); + + len = min(CESA_SA_SRAM_PAYLOAD_SIZE - frag_len, trailerlen); + if (len) { + ret = mv_cesa_dma_add_data_transfer(chain, + CESA_SA_DATA_SRAM_OFFSET + + frag_len, + ahashdreq->padding_dma, + len, CESA_TDMA_DST_IN_SRAM, + flags); + if (ret) + return ERR_PTR(ret); + + op = mv_cesa_dma_add_frag(chain, &creq->op_tmpl, frag_len + len, + flags); + if (IS_ERR(op)) + return op; + + if (len == trailerlen) + return op; + + padoff += len; + } + + ret = mv_cesa_dma_add_data_transfer(chain, + CESA_SA_DATA_SRAM_OFFSET, + ahashdreq->padding_dma + + padoff, + trailerlen - padoff, + CESA_TDMA_DST_IN_SRAM, + flags); + if (ret) + return ERR_PTR(ret); + + return mv_cesa_dma_add_frag(chain, &creq->op_tmpl, trailerlen - padoff, + flags); +} + +static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + struct mv_cesa_req *basereq = &creq->base; + struct mv_cesa_ahash_dma_iter iter; + struct mv_cesa_op_ctx *op = NULL; + unsigned int frag_len; + bool set_state = false; + int ret; + u32 type; + + basereq->chain.first = NULL; + basereq->chain.last = NULL; + + if (!mv_cesa_mac_op_is_first_frag(&creq->op_tmpl)) + set_state = true; + + if (creq->src_nents) { + ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, + DMA_TO_DEVICE); + if (!ret) { + ret = -ENOMEM; + goto err; + } + } + + mv_cesa_tdma_desc_iter_init(&basereq->chain); + mv_cesa_ahash_req_iter_init(&iter, req); + + /* + * Add the cache (left-over data from a previous block) first. + * This will never overflow the SRAM size. + */ + ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, creq, flags); + if (ret) + goto err_free_tdma; + + if (iter.src.sg) { + /* + * Add all the new data, inserting an operation block and + * launch command between each full SRAM block-worth of + * data. We intentionally do not add the final op block. + */ + while (true) { + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, + &iter.base, + &iter.src, flags); + if (ret) + goto err_free_tdma; + + frag_len = iter.base.op_len; + + if (!mv_cesa_ahash_req_iter_next_op(&iter)) + break; + + op = mv_cesa_dma_add_frag(&basereq->chain, + &creq->op_tmpl, + frag_len, flags); + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto err_free_tdma; + } + } + } else { + /* Account for the data that was in the cache. */ + frag_len = iter.base.op_len; + } + + /* + * At this point, frag_len indicates whether we have any data + * outstanding which needs an operation. Queue up the final + * operation, which depends whether this is the final request. + */ + if (creq->last_req) + op = mv_cesa_ahash_dma_last_req(&basereq->chain, &iter, creq, + frag_len, flags); + else if (frag_len) + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, + frag_len, flags); + + if (IS_ERR(op)) { + ret = PTR_ERR(op); + goto err_free_tdma; + } + + /* + * If results are copied via DMA, this means that this + * request can be directly processed by the engine, + * without partial updates. So we can chain it at the + * DMA level with other requests. + */ + type = basereq->chain.last->flags & CESA_TDMA_TYPE_MSK; + + if (op && type != CESA_TDMA_RESULT) { + /* Add dummy desc to wait for crypto operation end */ + ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags); + if (ret) + goto err_free_tdma; + } + + if (!creq->last_req) + creq->cache_ptr = req->nbytes + creq->cache_ptr - + iter.base.len; + else + creq->cache_ptr = 0; + + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ; + + if (type != CESA_TDMA_RESULT) + basereq->chain.last->flags |= CESA_TDMA_BREAK_CHAIN; + + if (set_state) { + /* + * Put the CESA_TDMA_SET_STATE flag on the first tdma desc to + * let the step logic know that the IVDIG registers should be + * explicitly set before launching a TDMA chain. + */ + basereq->chain.first->flags |= CESA_TDMA_SET_STATE; + } + + return 0; + +err_free_tdma: + mv_cesa_dma_cleanup(basereq); + dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); + +err: + mv_cesa_ahash_last_cleanup(req); + + return ret; +} + +static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + creq->src_nents = sg_nents_for_len(req->src, req->nbytes); + if (creq->src_nents < 0) { + dev_err(cesa_dev->dev, "Invalid number of src SG"); + return creq->src_nents; + } + + *cached = mv_cesa_ahash_cache_req(req); + + if (*cached) + return 0; + + if (cesa_dev->caps->has_tdma) + return mv_cesa_ahash_dma_req_init(req); + else + return 0; +} + +static int mv_cesa_ahash_queue_req(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_engine *engine; + bool cached = false; + int ret; + + ret = mv_cesa_ahash_req_init(req, &cached); + if (ret) + return ret; + + if (cached) + return 0; + + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ahash_prepare(&req->base, engine); + + ret = mv_cesa_queue_req(&req->base, &creq->base); + + if (mv_cesa_req_needs_cleanup(&req->base, ret)) + mv_cesa_ahash_cleanup(req); + + return ret; +} + +static int mv_cesa_ahash_update(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + creq->len += req->nbytes; + + return mv_cesa_ahash_queue_req(req); +} + +static int mv_cesa_ahash_final(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; + + mv_cesa_set_mac_op_total_len(tmpl, creq->len); + creq->last_req = true; + req->nbytes = 0; + + return mv_cesa_ahash_queue_req(req); +} + +static int mv_cesa_ahash_finup(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; + + creq->len += req->nbytes; + mv_cesa_set_mac_op_total_len(tmpl, creq->len); + creq->last_req = true; + + return mv_cesa_ahash_queue_req(req); +} + +static int mv_cesa_ahash_export(struct ahash_request *req, void *hash, + u64 *len, void *cache) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int digsize = crypto_ahash_digestsize(ahash); + unsigned int blocksize; + + blocksize = crypto_ahash_blocksize(ahash); + + *len = creq->len; + memcpy(hash, creq->state, digsize); + memset(cache, 0, blocksize); + memcpy(cache, creq->cache, creq->cache_ptr); + + return 0; +} + +static int mv_cesa_ahash_import(struct ahash_request *req, const void *hash, + u64 len, const void *cache) +{ + struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + unsigned int digsize = crypto_ahash_digestsize(ahash); + unsigned int blocksize; + unsigned int cache_ptr; + int ret; + + ret = crypto_ahash_init(req); + if (ret) + return ret; + + blocksize = crypto_ahash_blocksize(ahash); + if (len >= blocksize) + mv_cesa_update_op_cfg(&creq->op_tmpl, + CESA_SA_DESC_CFG_MID_FRAG, + CESA_SA_DESC_CFG_FRAG_MSK); + + creq->len = len; + memcpy(creq->state, hash, digsize); + creq->cache_ptr = 0; + + cache_ptr = do_div(len, blocksize); + if (!cache_ptr) + return 0; + + memcpy(creq->cache, cache, cache_ptr); + creq->cache_ptr = cache_ptr; + + return 0; +} + +static int mv_cesa_md5_init(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_op_ctx tmpl = { }; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_MD5); + + mv_cesa_ahash_init(req, &tmpl, true); + + creq->state[0] = MD5_H0; + creq->state[1] = MD5_H1; + creq->state[2] = MD5_H2; + creq->state[3] = MD5_H3; + + return 0; +} + +static int mv_cesa_md5_export(struct ahash_request *req, void *out) +{ + struct md5_state *out_state = out; + + return mv_cesa_ahash_export(req, out_state->hash, + &out_state->byte_count, out_state->block); +} + +static int mv_cesa_md5_import(struct ahash_request *req, const void *in) +{ + const struct md5_state *in_state = in; + + return mv_cesa_ahash_import(req, in_state->hash, in_state->byte_count, + in_state->block); +} + +static int mv_cesa_md5_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_md5_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_md5_alg = { + .init = mv_cesa_md5_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_md5_digest, + .export = mv_cesa_md5_export, + .import = mv_cesa_md5_import, + .halg = { + .digestsize = MD5_DIGEST_SIZE, + .statesize = sizeof(struct md5_state), + .base = { + .cra_name = "md5", + .cra_driver_name = "mv-md5", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), + .cra_init = mv_cesa_ahash_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_sha1_init(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_op_ctx tmpl = { }; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA1); + + mv_cesa_ahash_init(req, &tmpl, false); + + creq->state[0] = SHA1_H0; + creq->state[1] = SHA1_H1; + creq->state[2] = SHA1_H2; + creq->state[3] = SHA1_H3; + creq->state[4] = SHA1_H4; + + return 0; +} + +static int mv_cesa_sha1_export(struct ahash_request *req, void *out) +{ + struct sha1_state *out_state = out; + + return mv_cesa_ahash_export(req, out_state->state, &out_state->count, + out_state->buffer); +} + +static int mv_cesa_sha1_import(struct ahash_request *req, const void *in) +{ + const struct sha1_state *in_state = in; + + return mv_cesa_ahash_import(req, in_state->state, in_state->count, + in_state->buffer); +} + +static int mv_cesa_sha1_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_sha1_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_sha1_alg = { + .init = mv_cesa_sha1_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_sha1_digest, + .export = mv_cesa_sha1_export, + .import = mv_cesa_sha1_import, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "mv-sha1", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), + .cra_init = mv_cesa_ahash_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_sha256_init(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_op_ctx tmpl = { }; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_SHA256); + + mv_cesa_ahash_init(req, &tmpl, false); + + creq->state[0] = SHA256_H0; + creq->state[1] = SHA256_H1; + creq->state[2] = SHA256_H2; + creq->state[3] = SHA256_H3; + creq->state[4] = SHA256_H4; + creq->state[5] = SHA256_H5; + creq->state[6] = SHA256_H6; + creq->state[7] = SHA256_H7; + + return 0; +} + +static int mv_cesa_sha256_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_sha256_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +static int mv_cesa_sha256_export(struct ahash_request *req, void *out) +{ + struct sha256_state *out_state = out; + + return mv_cesa_ahash_export(req, out_state->state, &out_state->count, + out_state->buf); +} + +static int mv_cesa_sha256_import(struct ahash_request *req, const void *in) +{ + const struct sha256_state *in_state = in; + + return mv_cesa_ahash_import(req, in_state->state, in_state->count, + in_state->buf); +} + +struct ahash_alg mv_sha256_alg = { + .init = mv_cesa_sha256_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_sha256_digest, + .export = mv_cesa_sha256_export, + .import = mv_cesa_sha256_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "mv-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hash_ctx), + .cra_init = mv_cesa_ahash_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_ahmac_iv_state_init(struct ahash_request *req, u8 *pad, + void *state, unsigned int blocksize) +{ + DECLARE_CRYPTO_WAIT(result); + struct scatterlist sg; + int ret; + + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &result); + sg_init_one(&sg, pad, blocksize); + ahash_request_set_crypt(req, &sg, pad, blocksize); + + ret = crypto_ahash_init(req); + if (ret) + return ret; + + ret = crypto_ahash_update(req); + ret = crypto_wait_req(ret, &result); + + if (ret) + return ret; + + ret = crypto_ahash_export(req, state); + if (ret) + return ret; + + return 0; +} + +static int mv_cesa_ahmac_pad_init(struct ahash_request *req, + const u8 *key, unsigned int keylen, + u8 *ipad, u8 *opad, + unsigned int blocksize) +{ + DECLARE_CRYPTO_WAIT(result); + struct scatterlist sg; + int ret; + int i; + + if (keylen <= blocksize) { + memcpy(ipad, key, keylen); + } else { + u8 *keydup = kmemdup(key, keylen, GFP_KERNEL); + + if (!keydup) + return -ENOMEM; + + ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + crypto_req_done, &result); + sg_init_one(&sg, keydup, keylen); + ahash_request_set_crypt(req, &sg, ipad, keylen); + + ret = crypto_ahash_digest(req); + ret = crypto_wait_req(ret, &result); + + /* Set the memory region to 0 to avoid any leak. */ + kfree_sensitive(keydup); + + if (ret) + return ret; + + keylen = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + } + + memset(ipad + keylen, 0, blocksize - keylen); + memcpy(opad, ipad, blocksize); + + for (i = 0; i < blocksize; i++) { + ipad[i] ^= HMAC_IPAD_VALUE; + opad[i] ^= HMAC_OPAD_VALUE; + } + + return 0; +} + +static int mv_cesa_ahmac_setkey(const char *hash_alg_name, + const u8 *key, unsigned int keylen, + void *istate, void *ostate) +{ + struct ahash_request *req; + struct crypto_ahash *tfm; + unsigned int blocksize; + u8 *ipad = NULL; + u8 *opad; + int ret; + + tfm = crypto_alloc_ahash(hash_alg_name, 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + req = ahash_request_alloc(tfm, GFP_KERNEL); + if (!req) { + ret = -ENOMEM; + goto free_ahash; + } + + crypto_ahash_clear_flags(tfm, ~0); + + blocksize = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + + ipad = kcalloc(2, blocksize, GFP_KERNEL); + if (!ipad) { + ret = -ENOMEM; + goto free_req; + } + + opad = ipad + blocksize; + + ret = mv_cesa_ahmac_pad_init(req, key, keylen, ipad, opad, blocksize); + if (ret) + goto free_ipad; + + ret = mv_cesa_ahmac_iv_state_init(req, ipad, istate, blocksize); + if (ret) + goto free_ipad; + + ret = mv_cesa_ahmac_iv_state_init(req, opad, ostate, blocksize); + +free_ipad: + kfree(ipad); +free_req: + ahash_request_free(req); +free_ahash: + crypto_free_ahash(tfm); + + return ret; +} + +static int mv_cesa_ahmac_cra_init(struct crypto_tfm *tfm) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(tfm); + + ctx->base.ops = &mv_cesa_ahash_req_ops; + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct mv_cesa_ahash_req)); + return 0; +} + +static int mv_cesa_ahmac_md5_init(struct ahash_request *req) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct mv_cesa_op_ctx tmpl = { }; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_MD5); + memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv)); + + mv_cesa_ahash_init(req, &tmpl, true); + + return 0; +} + +static int mv_cesa_ahmac_md5_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct md5_state istate, ostate; + int ret, i; + + ret = mv_cesa_ahmac_setkey("mv-md5", key, keylen, &istate, &ostate); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(istate.hash); i++) + ctx->iv[i] = cpu_to_be32(istate.hash[i]); + + for (i = 0; i < ARRAY_SIZE(ostate.hash); i++) + ctx->iv[i + 8] = cpu_to_be32(ostate.hash[i]); + + return 0; +} + +static int mv_cesa_ahmac_md5_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_ahmac_md5_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_ahmac_md5_alg = { + .init = mv_cesa_ahmac_md5_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_ahmac_md5_digest, + .setkey = mv_cesa_ahmac_md5_setkey, + .export = mv_cesa_md5_export, + .import = mv_cesa_md5_import, + .halg = { + .digestsize = MD5_DIGEST_SIZE, + .statesize = sizeof(struct md5_state), + .base = { + .cra_name = "hmac(md5)", + .cra_driver_name = "mv-hmac-md5", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = MD5_HMAC_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), + .cra_init = mv_cesa_ahmac_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_ahmac_sha1_init(struct ahash_request *req) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct mv_cesa_op_ctx tmpl = { }; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_SHA1); + memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv)); + + mv_cesa_ahash_init(req, &tmpl, false); + + return 0; +} + +static int mv_cesa_ahmac_sha1_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct sha1_state istate, ostate; + int ret, i; + + ret = mv_cesa_ahmac_setkey("mv-sha1", key, keylen, &istate, &ostate); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(istate.state); i++) + ctx->iv[i] = cpu_to_be32(istate.state[i]); + + for (i = 0; i < ARRAY_SIZE(ostate.state); i++) + ctx->iv[i + 8] = cpu_to_be32(ostate.state[i]); + + return 0; +} + +static int mv_cesa_ahmac_sha1_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_ahmac_sha1_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_ahmac_sha1_alg = { + .init = mv_cesa_ahmac_sha1_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_ahmac_sha1_digest, + .setkey = mv_cesa_ahmac_sha1_setkey, + .export = mv_cesa_sha1_export, + .import = mv_cesa_sha1_import, + .halg = { + .digestsize = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), + .base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "mv-hmac-sha1", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), + .cra_init = mv_cesa_ahmac_cra_init, + .cra_module = THIS_MODULE, + } + } +}; + +static int mv_cesa_ahmac_sha256_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct sha256_state istate, ostate; + int ret, i; + + ret = mv_cesa_ahmac_setkey("mv-sha256", key, keylen, &istate, &ostate); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(istate.state); i++) + ctx->iv[i] = cpu_to_be32(istate.state[i]); + + for (i = 0; i < ARRAY_SIZE(ostate.state); i++) + ctx->iv[i + 8] = cpu_to_be32(ostate.state[i]); + + return 0; +} + +static int mv_cesa_ahmac_sha256_init(struct ahash_request *req) +{ + struct mv_cesa_hmac_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct mv_cesa_op_ctx tmpl = { }; + + mv_cesa_set_op_cfg(&tmpl, CESA_SA_DESC_CFG_MACM_HMAC_SHA256); + memcpy(tmpl.ctx.hash.iv, ctx->iv, sizeof(ctx->iv)); + + mv_cesa_ahash_init(req, &tmpl, false); + + return 0; +} + +static int mv_cesa_ahmac_sha256_digest(struct ahash_request *req) +{ + int ret; + + ret = mv_cesa_ahmac_sha256_init(req); + if (ret) + return ret; + + return mv_cesa_ahash_finup(req); +} + +struct ahash_alg mv_ahmac_sha256_alg = { + .init = mv_cesa_ahmac_sha256_init, + .update = mv_cesa_ahash_update, + .final = mv_cesa_ahash_final, + .finup = mv_cesa_ahash_finup, + .digest = mv_cesa_ahmac_sha256_digest, + .setkey = mv_cesa_ahmac_sha256_setkey, + .export = mv_cesa_sha256_export, + .import = mv_cesa_sha256_import, + .halg = { + .digestsize = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), + .base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "mv-hmac-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct mv_cesa_hmac_ctx), + .cra_init = mv_cesa_ahmac_cra_init, + .cra_module = THIS_MODULE, + } + } +}; diff --git a/drivers/crypto/marvell/cesa/tdma.c b/drivers/crypto/marvell/cesa/tdma.c new file mode 100644 index 0000000000..388a06e180 --- /dev/null +++ b/drivers/crypto/marvell/cesa/tdma.c @@ -0,0 +1,402 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Provide TDMA helper functions used by cipher and hash algorithm + * implementations. + * + * Author: Boris Brezillon <boris.brezillon@free-electrons.com> + * Author: Arnaud Ebalard <arno@natisbad.org> + * + * This work is based on an initial version written by + * Sebastian Andrzej Siewior < sebastian at breakpoint dot cc > + */ + +#include "cesa.h" + +bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *iter, + struct mv_cesa_sg_dma_iter *sgiter, + unsigned int len) +{ + if (!sgiter->sg) + return false; + + sgiter->op_offset += len; + sgiter->offset += len; + if (sgiter->offset == sg_dma_len(sgiter->sg)) { + if (sg_is_last(sgiter->sg)) + return false; + sgiter->offset = 0; + sgiter->sg = sg_next(sgiter->sg); + } + + if (sgiter->op_offset == iter->op_len) + return false; + + return true; +} + +void mv_cesa_dma_step(struct mv_cesa_req *dreq) +{ + struct mv_cesa_engine *engine = dreq->engine; + + writel_relaxed(0, engine->regs + CESA_SA_CFG); + + mv_cesa_set_int_mask(engine, CESA_SA_INT_ACC0_IDMA_DONE); + writel_relaxed(CESA_TDMA_DST_BURST_128B | CESA_TDMA_SRC_BURST_128B | + CESA_TDMA_NO_BYTE_SWAP | CESA_TDMA_EN, + engine->regs + CESA_TDMA_CONTROL); + + writel_relaxed(CESA_SA_CFG_ACT_CH0_IDMA | CESA_SA_CFG_MULTI_PKT | + CESA_SA_CFG_CH0_W_IDMA | CESA_SA_CFG_PARA_DIS, + engine->regs + CESA_SA_CFG); + writel_relaxed(dreq->chain.first->cur_dma, + engine->regs + CESA_TDMA_NEXT_ADDR); + WARN_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); + writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); +} + +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq) +{ + struct mv_cesa_tdma_desc *tdma; + + for (tdma = dreq->chain.first; tdma;) { + struct mv_cesa_tdma_desc *old_tdma = tdma; + u32 type = tdma->flags & CESA_TDMA_TYPE_MSK; + + if (type == CESA_TDMA_OP) + dma_pool_free(cesa_dev->dma->op_pool, tdma->op, + le32_to_cpu(tdma->src)); + + tdma = tdma->next; + dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma, + old_tdma->cur_dma); + } + + dreq->chain.first = NULL; + dreq->chain.last = NULL; +} + +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, + struct mv_cesa_engine *engine) +{ + struct mv_cesa_tdma_desc *tdma; + + for (tdma = dreq->chain.first; tdma; tdma = tdma->next) { + if (tdma->flags & CESA_TDMA_DST_IN_SRAM) + tdma->dst = cpu_to_le32(tdma->dst_dma + engine->sram_dma); + + if (tdma->flags & CESA_TDMA_SRC_IN_SRAM) + tdma->src = cpu_to_le32(tdma->src_dma + engine->sram_dma); + + if ((tdma->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_OP) + mv_cesa_adjust_op(engine, tdma->op); + } +} + +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq) +{ + if (engine->chain.first == NULL && engine->chain.last == NULL) { + engine->chain.first = dreq->chain.first; + engine->chain.last = dreq->chain.last; + } else { + struct mv_cesa_tdma_desc *last; + + last = engine->chain.last; + last->next = dreq->chain.first; + engine->chain.last = dreq->chain.last; + + /* + * Break the DMA chain if the CESA_TDMA_BREAK_CHAIN is set on + * the last element of the current chain, or if the request + * being queued needs the IV regs to be set before lauching + * the request. + */ + if (!(last->flags & CESA_TDMA_BREAK_CHAIN) && + !(dreq->chain.first->flags & CESA_TDMA_SET_STATE)) + last->next_dma = cpu_to_le32(dreq->chain.first->cur_dma); + } +} + +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req = NULL; + struct mv_cesa_tdma_desc *tdma = NULL, *next = NULL; + dma_addr_t tdma_cur; + int res = 0; + + tdma_cur = readl(engine->regs + CESA_TDMA_CUR); + + for (tdma = engine->chain.first; tdma; tdma = next) { + spin_lock_bh(&engine->lock); + next = tdma->next; + spin_unlock_bh(&engine->lock); + + if (tdma->flags & CESA_TDMA_END_OF_REQ) { + struct crypto_async_request *backlog = NULL; + struct mv_cesa_ctx *ctx; + u32 current_status; + + spin_lock_bh(&engine->lock); + /* + * if req is NULL, this means we're processing the + * request in engine->req. + */ + if (!req) + req = engine->req; + else + req = mv_cesa_dequeue_req_locked(engine, + &backlog); + + /* Re-chaining to the next request */ + engine->chain.first = tdma->next; + tdma->next = NULL; + + /* If this is the last request, clear the chain */ + if (engine->chain.first == NULL) + engine->chain.last = NULL; + spin_unlock_bh(&engine->lock); + + ctx = crypto_tfm_ctx(req->tfm); + current_status = (tdma->cur_dma == tdma_cur) ? + status : CESA_SA_INT_ACC0_IDMA_DONE; + res = ctx->ops->process(req, current_status); + ctx->ops->complete(req); + + if (res == 0) + mv_cesa_engine_enqueue_complete_request(engine, + req); + + if (backlog) + crypto_request_complete(backlog, -EINPROGRESS); + } + + if (res || tdma->cur_dma == tdma_cur) + break; + } + + /* + * Save the last request in error to engine->req, so that the core + * knows which request was faulty + */ + if (res) { + spin_lock_bh(&engine->lock); + engine->req = req; + spin_unlock_bh(&engine->lock); + } + + return res; +} + +static struct mv_cesa_tdma_desc * +mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) +{ + struct mv_cesa_tdma_desc *new_tdma = NULL; + dma_addr_t dma_handle; + + new_tdma = dma_pool_zalloc(cesa_dev->dma->tdma_desc_pool, flags, + &dma_handle); + if (!new_tdma) + return ERR_PTR(-ENOMEM); + + new_tdma->cur_dma = dma_handle; + if (chain->last) { + chain->last->next_dma = cpu_to_le32(dma_handle); + chain->last->next = new_tdma; + } else { + chain->first = new_tdma; + } + + chain->last = new_tdma; + + return new_tdma; +} + +int mv_cesa_dma_add_result_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags) +{ + struct mv_cesa_tdma_desc *tdma, *op_desc; + + tdma = mv_cesa_dma_add_desc(chain, gfp_flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + /* We re-use an existing op_desc object to retrieve the context + * and result instead of allocating a new one. + * There is at least one object of this type in a CESA crypto + * req, just pick the first one in the chain. + */ + for (op_desc = chain->first; op_desc; op_desc = op_desc->next) { + u32 type = op_desc->flags & CESA_TDMA_TYPE_MSK; + + if (type == CESA_TDMA_OP) + break; + } + + if (!op_desc) + return -EIO; + + tdma->byte_cnt = cpu_to_le32(size | BIT(31)); + tdma->src_dma = src; + tdma->dst_dma = op_desc->src_dma; + tdma->op = op_desc->op; + + flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM); + tdma->flags = flags | CESA_TDMA_RESULT; + return 0; +} + +struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, + const struct mv_cesa_op_ctx *op_templ, + bool skip_ctx, + gfp_t flags) +{ + struct mv_cesa_tdma_desc *tdma; + struct mv_cesa_op_ctx *op; + dma_addr_t dma_handle; + unsigned int size; + + tdma = mv_cesa_dma_add_desc(chain, flags); + if (IS_ERR(tdma)) + return ERR_CAST(tdma); + + op = dma_pool_alloc(cesa_dev->dma->op_pool, flags, &dma_handle); + if (!op) + return ERR_PTR(-ENOMEM); + + *op = *op_templ; + + size = skip_ctx ? sizeof(op->desc) : sizeof(*op); + + tdma = chain->last; + tdma->op = op; + tdma->byte_cnt = cpu_to_le32(size | BIT(31)); + tdma->src = cpu_to_le32(dma_handle); + tdma->dst_dma = CESA_SA_CFG_SRAM_OFFSET; + tdma->flags = CESA_TDMA_DST_IN_SRAM | CESA_TDMA_OP; + + return op; +} + +int mv_cesa_dma_add_data_transfer(struct mv_cesa_tdma_chain *chain, + dma_addr_t dst, dma_addr_t src, u32 size, + u32 flags, gfp_t gfp_flags) +{ + struct mv_cesa_tdma_desc *tdma; + + tdma = mv_cesa_dma_add_desc(chain, gfp_flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + tdma->byte_cnt = cpu_to_le32(size | BIT(31)); + tdma->src_dma = src; + tdma->dst_dma = dst; + + flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM); + tdma->flags = flags | CESA_TDMA_DATA; + + return 0; +} + +int mv_cesa_dma_add_dummy_launch(struct mv_cesa_tdma_chain *chain, gfp_t flags) +{ + struct mv_cesa_tdma_desc *tdma; + + tdma = mv_cesa_dma_add_desc(chain, flags); + return PTR_ERR_OR_ZERO(tdma); +} + +int mv_cesa_dma_add_dummy_end(struct mv_cesa_tdma_chain *chain, gfp_t flags) +{ + struct mv_cesa_tdma_desc *tdma; + + tdma = mv_cesa_dma_add_desc(chain, flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + tdma->byte_cnt = cpu_to_le32(BIT(31)); + + return 0; +} + +int mv_cesa_dma_add_op_transfers(struct mv_cesa_tdma_chain *chain, + struct mv_cesa_dma_iter *dma_iter, + struct mv_cesa_sg_dma_iter *sgiter, + gfp_t gfp_flags) +{ + u32 flags = sgiter->dir == DMA_TO_DEVICE ? + CESA_TDMA_DST_IN_SRAM : CESA_TDMA_SRC_IN_SRAM; + unsigned int len; + + do { + dma_addr_t dst, src; + int ret; + + len = mv_cesa_req_dma_iter_transfer_len(dma_iter, sgiter); + if (sgiter->dir == DMA_TO_DEVICE) { + dst = CESA_SA_DATA_SRAM_OFFSET + sgiter->op_offset; + src = sg_dma_address(sgiter->sg) + sgiter->offset; + } else { + dst = sg_dma_address(sgiter->sg) + sgiter->offset; + src = CESA_SA_DATA_SRAM_OFFSET + sgiter->op_offset; + } + + ret = mv_cesa_dma_add_data_transfer(chain, dst, src, len, + flags, gfp_flags); + if (ret) + return ret; + + } while (mv_cesa_req_dma_iter_next_transfer(dma_iter, sgiter, len)); + + return 0; +} + +size_t mv_cesa_sg_copy(struct mv_cesa_engine *engine, + struct scatterlist *sgl, unsigned int nents, + unsigned int sram_off, size_t buflen, off_t skip, + bool to_sram) +{ + unsigned int sg_flags = SG_MITER_ATOMIC; + struct sg_mapping_iter miter; + unsigned int offset = 0; + + if (to_sram) + sg_flags |= SG_MITER_FROM_SG; + else + sg_flags |= SG_MITER_TO_SG; + + sg_miter_start(&miter, sgl, nents, sg_flags); + + if (!sg_miter_skip(&miter, skip)) + return 0; + + while ((offset < buflen) && sg_miter_next(&miter)) { + unsigned int len; + + len = min(miter.length, buflen - offset); + + if (to_sram) { + if (engine->pool) + memcpy(engine->sram_pool + sram_off + offset, + miter.addr, len); + else + memcpy_toio(engine->sram + sram_off + offset, + miter.addr, len); + } else { + if (engine->pool) + memcpy(miter.addr, + engine->sram_pool + sram_off + offset, + len); + else + memcpy_fromio(miter.addr, + engine->sram + sram_off + offset, + len); + } + + offset += len; + } + + sg_miter_stop(&miter); + + return offset; +} diff --git a/drivers/crypto/marvell/octeontx/Makefile b/drivers/crypto/marvell/octeontx/Makefile new file mode 100644 index 0000000000..5e956fe1a8 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX_CPT) += octeontx-cpt.o octeontx-cptvf.o + +octeontx-cpt-objs := otx_cptpf_main.o otx_cptpf_mbox.o otx_cptpf_ucode.o +octeontx-cptvf-objs := otx_cptvf_main.o otx_cptvf_mbox.o otx_cptvf_reqmgr.o \ + otx_cptvf_algs.o diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_common.h b/drivers/crypto/marvell/octeontx/otx_cpt_common.h new file mode 100644 index 0000000000..ca704a7a26 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cpt_common.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_COMMON_H +#define __OTX_CPT_COMMON_H + +#include <linux/types.h> +#include <linux/delay.h> +#include <linux/device.h> + +#define OTX_CPT_MAX_MBOX_DATA_STR_SIZE 64 + +enum otx_cptpf_type { + OTX_CPT_AE = 2, + OTX_CPT_SE = 3, + BAD_OTX_CPTPF_TYPE, +}; + +enum otx_cptvf_type { + OTX_CPT_AE_TYPES = 1, + OTX_CPT_SE_TYPES = 2, + BAD_OTX_CPTVF_TYPE, +}; + +/* VF-PF message opcodes */ +enum otx_cpt_mbox_opcode { + OTX_CPT_MSG_VF_UP = 1, + OTX_CPT_MSG_VF_DOWN, + OTX_CPT_MSG_READY, + OTX_CPT_MSG_QLEN, + OTX_CPT_MSG_QBIND_GRP, + OTX_CPT_MSG_VQ_PRIORITY, + OTX_CPT_MSG_PF_TYPE, + OTX_CPT_MSG_ACK, + OTX_CPT_MSG_NACK +}; + +/* OcteonTX CPT mailbox structure */ +struct otx_cpt_mbox { + u64 msg; /* Message type MBOX[0] */ + u64 data;/* Data MBOX[1] */ +}; + +#endif /* __OTX_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h new file mode 100644 index 0000000000..f8aedafdfd --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h @@ -0,0 +1,824 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_HW_TYPES_H +#define __OTX_CPT_HW_TYPES_H + +#include <linux/types.h> + +/* Device IDs */ +#define OTX_CPT_PCI_PF_DEVICE_ID 0xa040 +#define OTX_CPT_PCI_VF_DEVICE_ID 0xa041 + +#define OTX_CPT_PCI_PF_SUBSYS_ID 0xa340 +#define OTX_CPT_PCI_VF_SUBSYS_ID 0xa341 + +/* Configuration and status registers are in BAR0 on OcteonTX platform */ +#define OTX_CPT_PF_PCI_CFG_BAR 0 +#define OTX_CPT_VF_PCI_CFG_BAR 0 + +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_OFFSET(a, b) \ + (0x000020000000ll + 0x1000000000ll * (a) + 0x100000ll * (b)) +#define OTX_CPT_BAR_E_CPTX_VFX_BAR0_SIZE 0x400000 + +/* Mailbox interrupts offset */ +#define OTX_CPT_PF_MBOX_INT 3 +#define OTX_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a)) +/* Number of MSIX supported in PF */ +#define OTX_CPT_PF_MSIX_VECTORS 4 +/* Maximum supported microcode groups */ +#define OTX_CPT_MAX_ENGINE_GROUPS 8 + +/* CPT instruction size in bytes */ +#define OTX_CPT_INST_SIZE 64 +/* CPT queue next chunk pointer size in bytes */ +#define OTX_CPT_NEXT_CHUNK_PTR_SIZE 8 + +/* OcteonTX CPT VF MSIX vectors and their offsets */ +#define OTX_CPT_VF_MSIX_VECTORS 2 +#define OTX_CPT_VF_INTR_MBOX_MASK BIT(0) +#define OTX_CPT_VF_INTR_DOVF_MASK BIT(1) +#define OTX_CPT_VF_INTR_IRDE_MASK BIT(2) +#define OTX_CPT_VF_INTR_NWRP_MASK BIT(3) +#define OTX_CPT_VF_INTR_SERR_MASK BIT(4) + +/* OcteonTX CPT PF registers */ +#define OTX_CPT_PF_CONSTANTS (0x0ll) +#define OTX_CPT_PF_RESET (0x100ll) +#define OTX_CPT_PF_DIAG (0x120ll) +#define OTX_CPT_PF_BIST_STATUS (0x160ll) +#define OTX_CPT_PF_ECC0_CTL (0x200ll) +#define OTX_CPT_PF_ECC0_FLIP (0x210ll) +#define OTX_CPT_PF_ECC0_INT (0x220ll) +#define OTX_CPT_PF_ECC0_INT_W1S (0x230ll) +#define OTX_CPT_PF_ECC0_ENA_W1S (0x240ll) +#define OTX_CPT_PF_ECC0_ENA_W1C (0x250ll) +#define OTX_CPT_PF_MBOX_INTX(b) (0x400ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_INT_W1SX(b) (0x420ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_ENA_W1CX(b) (0x440ll | (u64)(b) << 3) +#define OTX_CPT_PF_MBOX_ENA_W1SX(b) (0x460ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXEC_INT (0x500ll) +#define OTX_CPT_PF_EXEC_INT_W1S (0x520ll) +#define OTX_CPT_PF_EXEC_ENA_W1C (0x540ll) +#define OTX_CPT_PF_EXEC_ENA_W1S (0x560ll) +#define OTX_CPT_PF_GX_EN(b) (0x600ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXEC_INFO (0x700ll) +#define OTX_CPT_PF_EXEC_BUSY (0x800ll) +#define OTX_CPT_PF_EXEC_INFO0 (0x900ll) +#define OTX_CPT_PF_EXEC_INFO1 (0x910ll) +#define OTX_CPT_PF_INST_REQ_PC (0x10000ll) +#define OTX_CPT_PF_INST_LATENCY_PC (0x10020ll) +#define OTX_CPT_PF_RD_REQ_PC (0x10040ll) +#define OTX_CPT_PF_RD_LATENCY_PC (0x10060ll) +#define OTX_CPT_PF_RD_UC_PC (0x10080ll) +#define OTX_CPT_PF_ACTIVE_CYCLES_PC (0x10100ll) +#define OTX_CPT_PF_EXE_CTL (0x4000000ll) +#define OTX_CPT_PF_EXE_STATUS (0x4000008ll) +#define OTX_CPT_PF_EXE_CLK (0x4000010ll) +#define OTX_CPT_PF_EXE_DBG_CTL (0x4000018ll) +#define OTX_CPT_PF_EXE_DBG_DATA (0x4000020ll) +#define OTX_CPT_PF_EXE_BIST_STATUS (0x4000028ll) +#define OTX_CPT_PF_EXE_REQ_TIMER (0x4000030ll) +#define OTX_CPT_PF_EXE_MEM_CTL (0x4000038ll) +#define OTX_CPT_PF_EXE_PERF_CTL (0x4001000ll) +#define OTX_CPT_PF_EXE_DBG_CNTX(b) (0x4001100ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXE_PERF_EVENT_CNT (0x4001180ll) +#define OTX_CPT_PF_EXE_EPCI_INBX_CNT(b) (0x4001200ll | (u64)(b) << 3) +#define OTX_CPT_PF_EXE_EPCI_OUTBX_CNT(b) (0x4001240ll | (u64)(b) << 3) +#define OTX_CPT_PF_ENGX_UCODE_BASE(b) (0x4002000ll | (u64)(b) << 3) +#define OTX_CPT_PF_QX_CTL(b) (0x8000000ll | (u64)(b) << 20) +#define OTX_CPT_PF_QX_GMCTL(b) (0x8000020ll | (u64)(b) << 20) +#define OTX_CPT_PF_QX_CTL2(b) (0x8000100ll | (u64)(b) << 20) +#define OTX_CPT_PF_VFX_MBOXX(b, c) (0x8001000ll | (u64)(b) << 20 | \ + (u64)(c) << 8) + +/* OcteonTX CPT VF registers */ +#define OTX_CPT_VQX_CTL(b) (0x100ll | (u64)(b) << 20) +#define OTX_CPT_VQX_SADDR(b) (0x200ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_WAIT(b) (0x400ll | (u64)(b) << 20) +#define OTX_CPT_VQX_INPROG(b) (0x410ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE(b) (0x420ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ACK(b) (0x440ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1S(b) (0x460ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_INT_W1C(b) (0x468ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1S(b) (0x470ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DONE_ENA_W1C(b) (0x478ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT(b) (0x500ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_INT_W1S(b) (0x508ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1S(b) (0x510ll | (u64)(b) << 20) +#define OTX_CPT_VQX_MISC_ENA_W1C(b) (0x518ll | (u64)(b) << 20) +#define OTX_CPT_VQX_DOORBELL(b) (0x600ll | (u64)(b) << 20) +#define OTX_CPT_VFX_PF_MBOXX(b, c) (0x1000ll | ((b) << 20) | ((c) << 3)) + +/* + * Enumeration otx_cpt_ucode_error_code_e + * + * Enumerates ucode errors + */ +enum otx_cpt_ucode_error_code_e { + CPT_NO_UCODE_ERROR = 0x00, + ERR_OPCODE_UNSUPPORTED = 0x01, + + /* Scatter gather */ + ERR_SCATTER_GATHER_WRITE_LENGTH = 0x02, + ERR_SCATTER_GATHER_LIST = 0x03, + ERR_SCATTER_GATHER_NOT_SUPPORTED = 0x04, + +}; + +/* + * Enumeration otx_cpt_comp_e + * + * CPT OcteonTX Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum otx_cpt_comp_e { + CPT_COMP_E_NOTDONE = 0x00, + CPT_COMP_E_GOOD = 0x01, + CPT_COMP_E_FAULT = 0x02, + CPT_COMP_E_SWERR = 0x03, + CPT_COMP_E_HWERR = 0x04, + CPT_COMP_E_LAST_ENTRY = 0x05 +}; + +/* + * Enumeration otx_cpt_vf_int_vec_e + * + * CPT OcteonTX VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx_cpt_vf_int_vec_e { + CPT_VF_INT_VEC_E_MISC = 0x00, + CPT_VF_INT_VEC_E_DONE = 0x01 +}; + +/* + * Structure cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union otx_cpt_inst_s { + u64 u[8]; + + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_0_15:16; +#else /* Word 0 - Little Endian */ + u64 reserved_0_15:16; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 res_addr; +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 2 - Big Endian */ + u64 reserved_172_191:20; + u64 grp:10; + u64 tt:2; + u64 tag:32; +#else /* Word 2 - Little Endian */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_191:20; +#endif /* Word 2 - End */ + u64 wq_ptr; + u64 ei0; + u64 ei1; + u64 ei2; + u64 ei3; + } s; +}; + +/* + * Structure cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union otx_cpt_res_s { + u64 u[2]; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_17_63:47; + u64 doneint:1; + u64 reserved_8_15:8; + u64 compcode:8; +#else /* Word 0 - Little Endian */ + u64 compcode:8; + u64 reserved_8_15:8; + u64 doneint:1; + u64 reserved_17_63:47; +#endif /* Word 0 - End */ + u64 reserved_64_127; + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_bist_status + * + * CPT PF Control Bist Status Register + * This register has the BIST status of memories. Each bit is the BIST result + * of an individual memory (per bit, 0 = pass and 1 = fail). + * otx_cptx_pf_bist_status_s + * Word0 + * bstatus [29:0](RO/H) BIST status. One bit per memory, enumerated by + * CPT_RAMS_E. + */ +union otx_cptx_pf_bist_status { + u64 u; + struct otx_cptx_pf_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_30_63:34; + u64 bstatus:30; +#else /* Word 0 - Little Endian */ + u64 bstatus:30; + u64 reserved_30_63:34; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_constants + * + * CPT PF Constants Register + * This register contains implementation-related parameters of CPT in CNXXXX. + * otx_cptx_pf_constants_s + * Word 0 + * reserved_40_63:24 [63:40] Reserved. + * epcis:8 [39:32](RO) Number of EPCI busses. + * grps:8 [31:24](RO) Number of engine groups implemented. + * ae:8 [23:16](RO/H) Number of AEs. In CNXXXX, for CPT0 returns 0x0, + * for CPT1 returns 0x18, or less if there are fuse-disables. + * se:8 [15:8](RO/H) Number of SEs. In CNXXXX, for CPT0 returns 0x30, + * or less if there are fuse-disables, for CPT1 returns 0x0. + * vq:8 [7:0](RO) Number of VQs. + */ +union otx_cptx_pf_constants { + u64 u; + struct otx_cptx_pf_constants_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_40_63:24; + u64 epcis:8; + u64 grps:8; + u64 ae:8; + u64 se:8; + u64 vq:8; +#else /* Word 0 - Little Endian */ + u64 vq:8; + u64 se:8; + u64 ae:8; + u64 grps:8; + u64 epcis:8; + u64 reserved_40_63:24; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_exe_bist_status + * + * CPT PF Engine Bist Status Register + * This register has the BIST status of each engine. Each bit is the + * BIST result of an individual engine (per bit, 0 = pass and 1 = fail). + * otx_cptx_pf_exe_bist_status_s + * Word0 + * reserved_48_63:16 [63:48] reserved + * bstatus:48 [47:0](RO/H) BIST status. One bit per engine. + * + */ +union otx_cptx_pf_exe_bist_status { + u64 u; + struct otx_cptx_pf_exe_bist_status_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 bstatus:48; +#else /* Word 0 - Little Endian */ + u64 bstatus:48; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_pf_q#_ctl + * + * CPT Queue Control Register + * This register configures queues. This register should be changed only + * when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * otx_cptx_pf_qx_ctl_s + * Word0 + * reserved_60_63:4 [63:60] reserved. + * aura:12; [59:48](R/W) Guest-aura for returning this queue's + * instruction-chunk buffers to FPA. Only used when [INST_FREE] is set. + * For the FPA to not discard the request, FPA_PF_MAP() must map + * [AURA] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * reserved_45_47:3 [47:45] reserved. + * size:13 [44:32](R/W) Command-buffer size, in number of 64-bit words per + * command buffer segment. Must be 8*n + 1, where n is the number of + * instructions per buffer segment. + * reserved_11_31:21 [31:11] Reserved. + * cont_err:1 [10:10](R/W) Continue on error. + * 0 = When CPT()_VQ()_MISC_INT[NWRP], CPT()_VQ()_MISC_INT[IRDE] or + * CPT()_VQ()_MISC_INT[DOVF] are set by hardware or software via + * CPT()_VQ()_MISC_INT_W1S, then CPT()_VQ()_CTL[ENA] is cleared. Due to + * pipelining, additional instructions may have been processed between the + * instruction causing the error and the next instruction in the disabled + * queue (the instruction at CPT()_VQ()_SADDR). + * 1 = Ignore errors and continue processing instructions. + * For diagnostic use only. + * inst_free:1 [9:9](R/W) Instruction FPA free. When set, when CPT reaches the + * end of an instruction chunk, that chunk will be freed to the FPA. + * inst_be:1 [8:8](R/W) Instruction big-endian control. When set, instructions, + * instruction next chunk pointers, and result structures are stored in + * big-endian format in memory. + * iqb_ldwb:1 [7:7](R/W) Instruction load don't write back. + * 0 = The hardware issues NCB transient load (LDT) towards the cache, + * which if the line hits and is dirty will cause the line to be + * written back before being replaced. + * 1 = The hardware issues NCB LDWB read-and-invalidate command towards + * the cache when fetching the last word of instructions; as a result the + * line will not be written back when replaced. This improves + * performance, but software must not read the instructions after they are + * posted to the hardware. Reads that do not consume the last word of a + * cache line always use LDI. + * reserved_4_6:3 [6:4] Reserved. + * grp:3; [3:1](R/W) Engine group. + * pri:1; [0:0](R/W) Queue priority. + * 1 = This queue has higher priority. Round-robin between higher + * priority queues. + * 0 = This queue has lower priority. Round-robin between lower + * priority queues. + */ +union otx_cptx_pf_qx_ctl { + u64 u; + struct otx_cptx_pf_qx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_60_63:4; + u64 aura:12; + u64 reserved_45_47:3; + u64 size:13; + u64 reserved_11_31:21; + u64 cont_err:1; + u64 inst_free:1; + u64 inst_be:1; + u64 iqb_ldwb:1; + u64 reserved_4_6:3; + u64 grp:3; + u64 pri:1; +#else /* Word 0 - Little Endian */ + u64 pri:1; + u64 grp:3; + u64 reserved_4_6:3; + u64 iqb_ldwb:1; + u64 inst_be:1; + u64 inst_free:1; + u64 cont_err:1; + u64 reserved_11_31:21; + u64 size:13; + u64 reserved_45_47:3; + u64 aura:12; + u64 reserved_60_63:4; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_saddr + * + * CPT Queue Starting Buffer Address Registers + * These registers set the instruction buffer starting address. + * otx_cptx_vqx_saddr_s + * Word0 + * reserved_49_63:15 [63:49] Reserved. + * ptr:43 [48:6](R/W/H) Instruction buffer IOVA <48:6> (64-byte aligned). + * When written, it is the initial buffer starting address; when read, + * it is the next read pointer to be requested from L2C. The PTR field + * is overwritten with the next pointer each time that the command buffer + * segment is exhausted. New commands will then be read from the newly + * specified command buffer pointer. + * reserved_0_5:6 [5:0] Reserved. + * + */ +union otx_cptx_vqx_saddr { + u64 u; + struct otx_cptx_vqx_saddr_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_49_63:15; + u64 ptr:43; + u64 reserved_0_5:6; +#else /* Word 0 - Little Endian */ + u64 reserved_0_5:6; + u64 ptr:43; + u64 reserved_49_63:15; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_ena_w1s + * + * CPT Queue Misc Interrupt Enable Set Register + * This register sets interrupt enable bits. + * otx_cptx_vqx_misc_ena_w1s_s + * Word0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[SWERR]. + * nwrp:1 [3:3](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[NWRP]. + * irde:1 [2:2](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[IRDE]. + * dovf:1 [1:1](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[DOVF]. + * mbox:1 [0:0](R/W1S/H) Reads or sets enable for + * CPT(0..1)_VQ(0..63)_MISC_INT[MBOX]. + * + */ +union otx_cptx_vqx_misc_ena_w1s { + u64 u; + struct otx_cptx_vqx_misc_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_doorbell + * + * CPT Queue Doorbell Registers + * Doorbells for the CPT instruction queues. + * otx_cptx_vqx_doorbell_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add + * to the CPT instruction doorbell count. Readback value is the + * current number of pending doorbell requests. If counter overflows + * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to + * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], + * then write a value of 2^20 minus the read [DBELL_CNT], then write one + * to CPT()_VQ()_MISC_INT_W1C[DBELL_DOVF] and + * CPT()_VQ()_MISC_INT_ENA_W1S[DBELL_DOVF]. Must be a multiple of 8. + * All CPT instructions are 8 words and require a doorbell count of + * multiple of 8. + */ +union otx_cptx_vqx_doorbell { + u64 u; + struct otx_cptx_vqx_doorbell_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 dbell_cnt:20; +#else /* Word 0 - Little Endian */ + u64 dbell_cnt:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_inprog + * + * CPT Queue In Progress Count Registers + * These registers contain the per-queue instruction in flight registers. + * otx_cptx_vqx_inprog_s + * Word0 + * reserved_8_63:56 [63:8] Reserved. + * inflight:8 [7:0](RO/H) Inflight count. Counts the number of instructions + * for the VF for which CPT is fetching, executing or responding to + * instructions. However this does not include any interrupts that are + * awaiting software handling (CPT()_VQ()_DONE[DONE] != 0x0). + * A queue may not be reconfigured until: + * 1. CPT()_VQ()_CTL[ENA] is cleared by software. + * 2. [INFLIGHT] is polled until equals to zero. + */ +union otx_cptx_vqx_inprog { + u64 u; + struct otx_cptx_vqx_inprog_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_8_63:56; + u64 inflight:8; +#else /* Word 0 - Little Endian */ + u64 inflight:8; + u64 reserved_8_63:56; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_misc_int + * + * CPT Queue Misc Interrupt Register + * These registers contain the per-queue miscellaneous interrupts. + * otx_cptx_vqx_misc_int_s + * Word 0 + * reserved_5_63:59 [63:5] Reserved. + * swerr:1 [4:4](R/W1C/H) Software error from engines. + * nwrp:1 [3:3](R/W1C/H) NCB result write response error. + * irde:1 [2:2](R/W1C/H) Instruction NCB read response error. + * dovf:1 [1:1](R/W1C/H) Doorbell overflow. + * mbox:1 [0:0](R/W1C/H) PF to VF mailbox interrupt. Set when + * CPT()_VF()_PF_MBOX(0) is written. + * + */ +union otx_cptx_vqx_misc_int { + u64 u; + struct otx_cptx_vqx_misc_int_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_5_63:59; + u64 swerr:1; + u64 nwrp:1; + u64 irde:1; + u64 dovf:1; + u64 mbox:1; +#else /* Word 0 - Little Endian */ + u64 mbox:1; + u64 dovf:1; + u64 irde:1; + u64 nwrp:1; + u64 swerr:1; + u64 reserved_5_63:59; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ack + * + * CPT Queue Done Count Ack Registers + * This register is written by software to acknowledge interrupts. + * otx_cptx_vqx_done_ack_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done_ack:20 [19:0](R/W/H) Number of decrements to CPT()_VQ()_DONE[DONE]. + * Reads CPT()_VQ()_DONE[DONE]. Written by software to acknowledge + * interrupts. If CPT()_VQ()_DONE[DONE] is still nonzero the interrupt + * will be re-sent if the conditions described in CPT()_VQ()_DONE[DONE] + * are satisfied. + * + */ +union otx_cptx_vqx_done_ack { + u64 u; + struct otx_cptx_vqx_done_ack_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done_ack:20; +#else /* Word 0 - Little Endian */ + u64 done_ack:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done + * + * CPT Queue Done Count Registers + * These registers contain the per-queue instruction done count. + * cptx_vqx_done_s + * Word0 + * reserved_20_63:44 [63:20] Reserved. + * done:20 [19:0](R/W/H) Done count. When CPT_INST_S[DONEINT] set and that + * instruction completes, CPT()_VQ()_DONE[DONE] is incremented when the + * instruction finishes. Write to this field are for diagnostic use only; + * instead software writes CPT()_VQ()_DONE_ACK with the number of + * decrements for this field. + * Interrupts are sent as follows: + * * When CPT()_VQ()_DONE[DONE] = 0, then no results are pending, the + * interrupt coalescing timer is held to zero, and an interrupt is not + * sent. + * * When CPT()_VQ()_DONE[DONE] != 0, then the interrupt coalescing timer + * counts. If the counter is >= CPT()_VQ()_DONE_WAIT[TIME_WAIT]*1024, or + * CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT], i.e. enough + * time has passed or enough results have arrived, then the interrupt is + * sent. + * * When CPT()_VQ()_DONE_ACK is written (or CPT()_VQ()_DONE is written + * but this is not typical), the interrupt coalescing timer restarts. + * Note after decrementing this interrupt equation is recomputed, + * for example if CPT()_VQ()_DONE[DONE] >= CPT()_VQ()_DONE_WAIT[NUM_WAIT] + * and because the timer is zero, the interrupt will be resent immediately. + * (This covers the race case between software acknowledging an interrupt + * and a result returning.) + * * When CPT()_VQ()_DONE_ENA_W1S[DONE] = 0, interrupts are not sent, + * but the counting described above still occurs. + * Since CPT instructions complete out-of-order, if software is using + * completion interrupts the suggested scheme is to request a DONEINT on + * each request, and when an interrupt arrives perform a "greedy" scan for + * completions; even if a later command is acknowledged first this will + * not result in missing a completion. + * Software is responsible for making sure [DONE] does not overflow; + * for example by insuring there are not more than 2^20-1 instructions in + * flight that may request interrupts. + * + */ +union otx_cptx_vqx_done { + u64 u; + struct otx_cptx_vqx_done_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_20_63:44; + u64 done:20; +#else /* Word 0 - Little Endian */ + u64 done:20; + u64 reserved_20_63:44; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_wait + * + * CPT Queue Done Interrupt Coalescing Wait Registers + * Specifies the per queue interrupt coalescing settings. + * cptx_vqx_done_wait_s + * Word0 + * reserved_48_63:16 [63:48] Reserved. + * time_wait:16; [47:32](R/W) Time hold-off. When CPT()_VQ()_DONE[DONE] = 0 + * or CPT()_VQ()_DONE_ACK is written a timer is cleared. When the timer + * reaches [TIME_WAIT]*1024 then interrupt coalescing ends. + * see CPT()_VQ()_DONE[DONE]. If 0x0, time coalescing is disabled. + * reserved_20_31:12 [31:20] Reserved. + * num_wait:20 [19:0](R/W) Number of messages hold-off. + * When CPT()_VQ()_DONE[DONE] >= [NUM_WAIT] then interrupt coalescing ends + * see CPT()_VQ()_DONE[DONE]. If 0x0, same behavior as 0x1. + * + */ +union otx_cptx_vqx_done_wait { + u64 u; + struct otx_cptx_vqx_done_wait_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_48_63:16; + u64 time_wait:16; + u64 reserved_20_31:12; + u64 num_wait:20; +#else /* Word 0 - Little Endian */ + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_done_ena_w1s + * + * CPT Queue Done Interrupt Enable Set Registers + * Write 1 to these registers will enable the DONEINT interrupt for the queue. + * cptx_vqx_done_ena_w1s_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * done:1 [0:0](R/W1S/H) Write 1 will enable DONEINT for this queue. + * Write 0 has no effect. Read will return the enable bit. + */ +union otx_cptx_vqx_done_ena_w1s { + u64 u; + struct otx_cptx_vqx_done_ena_w1s_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 done:1; +#else /* Word 0 - Little Endian */ + u64 done:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Register (NCB) otx_cpt#_vq#_ctl + * + * CPT VF Queue Control Registers + * This register configures queues. This register should be changed (other than + * clearing [ENA]) only when quiescent (see CPT()_VQ()_INPROG[INFLIGHT]). + * cptx_vqx_ctl_s + * Word0 + * reserved_1_63:63 [63:1] Reserved. + * ena:1 [0:0](R/W/H) Enables the logical instruction queue. + * See also CPT()_PF_Q()_CTL[CONT_ERR] and CPT()_VQ()_INPROG[INFLIGHT]. + * 1 = Queue is enabled. + * 0 = Queue is disabled. + */ +union otx_cptx_vqx_ctl { + u64 u; + struct otx_cptx_vqx_ctl_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + u64 reserved_1_63:63; + u64 ena:1; +#else /* Word 0 - Little Endian */ + u64 ena:1; + u64 reserved_1_63:63; +#endif /* Word 0 - End */ + } s; +}; + +/* + * Error Address/Error Codes + * + * In the event of a severe error, microcode writes an 8-byte Error Code + * value (ECODE) to host memory at the Rptr address specified by the host + * system (in the 64-byte request). + * + * Word0 + * [63:56](R) 8-bit completion code + * [55:48](R) Number of the core that reported the severe error + * [47:0] Lower 6 bytes of M-Inst word2. Used to assist in uniquely + * identifying which specific instruction caused the error. This assumes + * that each instruction has a unique result location (RPTR), at least + * for a given period of time. + */ +union otx_cpt_error_code { + u64 u; + struct otx_cpt_error_code_s { +#if defined(__BIG_ENDIAN_BITFIELD) /* Word 0 - Big Endian */ + uint64_t ccode:8; + uint64_t coreid:8; + uint64_t rptr6:48; +#else /* Word 0 - Little Endian */ + uint64_t rptr6:48; + uint64_t coreid:8; + uint64_t ccode:8; +#endif /* Word 0 - End */ + } s; +}; + +#endif /*__OTX_CPT_HW_TYPES_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf.h b/drivers/crypto/marvell/octeontx/otx_cptpf.h new file mode 100644 index 0000000000..73cd0a9bc5 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTPF_H +#define __OTX_CPTPF_H + +#include <linux/types.h> +#include <linux/device.h> +#include "otx_cptpf_ucode.h" + +/* + * OcteonTX CPT device structure + */ +struct otx_cpt_device { + void __iomem *reg_base; /* Register start address */ + struct pci_dev *pdev; /* Pci device handle */ + struct otx_cpt_eng_grps eng_grps;/* Engine groups information */ + struct list_head list; + u8 pf_type; /* PF type SE or AE */ + u8 max_vfs; /* Maximum number of VFs supported by the CPT */ + u8 vfs_enabled; /* Number of enabled VFs */ +}; + +void otx_cpt_mbox_intr_handler(struct otx_cpt_device *cpt, int mbx); +void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt); + +#endif /* __OTX_CPTPF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_main.c b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c new file mode 100644 index 0000000000..14a42559f8 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_main.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cpt_common.h" +#include "otx_cptpf.h" + +#define DRV_NAME "octeontx-cpt" +#define DRV_VERSION "1.0" + +static void otx_cpt_disable_mbox_interrupts(struct otx_cpt_device *cpt) +{ + /* Disable mbox(0) interrupts for all VFs */ + writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1CX(0)); +} + +static void otx_cpt_enable_mbox_interrupts(struct otx_cpt_device *cpt) +{ + /* Enable mbox(0) interrupts for all VFs */ + writeq(~0ull, cpt->reg_base + OTX_CPT_PF_MBOX_ENA_W1SX(0)); +} + +static irqreturn_t otx_cpt_mbx0_intr_handler(int __always_unused irq, + void *cpt) +{ + otx_cpt_mbox_intr_handler(cpt, 0); + + return IRQ_HANDLED; +} + +static void otx_cpt_reset(struct otx_cpt_device *cpt) +{ + writeq(1, cpt->reg_base + OTX_CPT_PF_RESET); +} + +static void otx_cpt_find_max_enabled_cores(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_constants pf_cnsts = {0}; + + pf_cnsts.u = readq(cpt->reg_base + OTX_CPT_PF_CONSTANTS); + cpt->eng_grps.avail.max_se_cnt = pf_cnsts.s.se; + cpt->eng_grps.avail.max_ae_cnt = pf_cnsts.s.ae; +} + +static u32 otx_cpt_check_bist_status(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_bist_status bist_sts = {0}; + + bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_BIST_STATUS); + return bist_sts.u; +} + +static u64 otx_cpt_check_exe_bist_status(struct otx_cpt_device *cpt) +{ + union otx_cptx_pf_exe_bist_status bist_sts = {0}; + + bist_sts.u = readq(cpt->reg_base + OTX_CPT_PF_EXE_BIST_STATUS); + return bist_sts.u; +} + +static int otx_cpt_device_init(struct otx_cpt_device *cpt) +{ + struct device *dev = &cpt->pdev->dev; + u16 sdevid; + u64 bist; + + /* Reset the PF when probed first */ + otx_cpt_reset(cpt); + mdelay(100); + + pci_read_config_word(cpt->pdev, PCI_SUBSYSTEM_ID, &sdevid); + + /* Check BIST status */ + bist = (u64)otx_cpt_check_bist_status(cpt); + if (bist) { + dev_err(dev, "RAM BIST failed with code 0x%llx\n", bist); + return -ENODEV; + } + + bist = otx_cpt_check_exe_bist_status(cpt); + if (bist) { + dev_err(dev, "Engine BIST failed with code 0x%llx\n", bist); + return -ENODEV; + } + + /* Get max enabled cores */ + otx_cpt_find_max_enabled_cores(cpt); + + if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) && + (cpt->eng_grps.avail.max_se_cnt == 0)) { + cpt->pf_type = OTX_CPT_AE; + } else if ((sdevid == OTX_CPT_PCI_PF_SUBSYS_ID) && + (cpt->eng_grps.avail.max_ae_cnt == 0)) { + cpt->pf_type = OTX_CPT_SE; + } + + /* Get max VQs/VFs supported by the device */ + cpt->max_vfs = pci_sriov_get_totalvfs(cpt->pdev); + + /* Disable all cores */ + otx_cpt_disable_all_cores(cpt); + + return 0; +} + +static int otx_cpt_register_interrupts(struct otx_cpt_device *cpt) +{ + struct device *dev = &cpt->pdev->dev; + u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT; + u32 num_vec = OTX_CPT_PF_MSIX_VECTORS; + int ret; + + /* Enable MSI-X */ + ret = pci_alloc_irq_vectors(cpt->pdev, num_vec, num_vec, PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&cpt->pdev->dev, + "Request for #%d msix vectors failed\n", + num_vec); + return ret; + } + + /* Register mailbox interrupt handlers */ + ret = request_irq(pci_irq_vector(cpt->pdev, + OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)), + otx_cpt_mbx0_intr_handler, 0, "CPT Mbox0", cpt); + if (ret) { + dev_err(dev, "Request irq failed\n"); + pci_free_irq_vectors(cpt->pdev); + return ret; + } + /* Enable mailbox interrupt */ + otx_cpt_enable_mbox_interrupts(cpt); + return 0; +} + +static void otx_cpt_unregister_interrupts(struct otx_cpt_device *cpt) +{ + u32 mbox_int_idx = OTX_CPT_PF_MBOX_INT; + + otx_cpt_disable_mbox_interrupts(cpt); + free_irq(pci_irq_vector(cpt->pdev, + OTX_CPT_PF_INT_VEC_E_MBOXX(mbox_int_idx, 0)), + cpt); + pci_free_irq_vectors(cpt->pdev); +} + + +static int otx_cpt_sriov_configure(struct pci_dev *pdev, int numvfs) +{ + struct otx_cpt_device *cpt = pci_get_drvdata(pdev); + int ret = 0; + + if (numvfs > cpt->max_vfs) + numvfs = cpt->max_vfs; + + if (numvfs > 0) { + ret = otx_cpt_try_create_default_eng_grps(cpt->pdev, + &cpt->eng_grps, + cpt->pf_type); + if (ret) + return ret; + + cpt->vfs_enabled = numvfs; + ret = pci_enable_sriov(pdev, numvfs); + if (ret) { + cpt->vfs_enabled = 0; + return ret; + } + otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, true); + try_module_get(THIS_MODULE); + ret = numvfs; + } else { + pci_disable_sriov(pdev); + otx_cpt_set_eng_grps_is_rdonly(&cpt->eng_grps, false); + module_put(THIS_MODULE); + cpt->vfs_enabled = 0; + } + dev_notice(&cpt->pdev->dev, "VFs enabled: %d\n", ret); + + return ret; +} + +static int otx_cpt_probe(struct pci_dev *pdev, + const struct pci_device_id __always_unused *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cpt_device *cpt; + int err; + + cpt = devm_kzalloc(dev, sizeof(*cpt), GFP_KERNEL); + if (!cpt) + return -ENOMEM; + + pci_set_drvdata(pdev, cpt); + cpt->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto err_clear_drvdata; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable 48-bit DMA configuration\n"); + goto err_release_regions; + } + + /* MAP PF's configuration registers */ + cpt->reg_base = pci_iomap(pdev, OTX_CPT_PF_PCI_CFG_BAR, 0); + if (!cpt->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + + /* CPT device HW initialization */ + err = otx_cpt_device_init(cpt); + if (err) + goto err_unmap_region; + + /* Register interrupts */ + err = otx_cpt_register_interrupts(cpt); + if (err) + goto err_unmap_region; + + /* Initialize engine groups */ + err = otx_cpt_init_eng_grps(pdev, &cpt->eng_grps, cpt->pf_type); + if (err) + goto err_unregister_interrupts; + + return 0; + +err_unregister_interrupts: + otx_cpt_unregister_interrupts(cpt); +err_unmap_region: + pci_iounmap(pdev, cpt->reg_base); +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); +err_clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cpt_remove(struct pci_dev *pdev) +{ + struct otx_cpt_device *cpt = pci_get_drvdata(pdev); + + if (!cpt) + return; + + /* Disable VFs */ + pci_disable_sriov(pdev); + /* Cleanup engine groups */ + otx_cpt_cleanup_eng_grps(pdev, &cpt->eng_grps); + /* Disable CPT PF interrupts */ + otx_cpt_unregister_interrupts(cpt); + /* Disengage SE and AE cores from all groups */ + otx_cpt_disable_all_cores(cpt); + pci_iounmap(pdev, cpt->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id otx_cpt_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX_CPT_PCI_PF_DEVICE_ID) }, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cpt_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cpt_id_table, + .probe = otx_cpt_probe, + .remove = otx_cpt_remove, + .sriov_configure = otx_cpt_sriov_configure +}; + +module_pci_driver(otx_cpt_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Physical Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cpt_id_table); diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c new file mode 100644 index 0000000000..a9e3de6587 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_mbox.c @@ -0,0 +1,253 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cpt_common.h" +#include "otx_cptpf.h" + +static char *get_mbox_opcode_str(int msg_opcode) +{ + char *str = "Unknown"; + + switch (msg_opcode) { + case OTX_CPT_MSG_VF_UP: + str = "UP"; + break; + + case OTX_CPT_MSG_VF_DOWN: + str = "DOWN"; + break; + + case OTX_CPT_MSG_READY: + str = "READY"; + break; + + case OTX_CPT_MSG_QLEN: + str = "QLEN"; + break; + + case OTX_CPT_MSG_QBIND_GRP: + str = "QBIND_GRP"; + break; + + case OTX_CPT_MSG_VQ_PRIORITY: + str = "VQ_PRIORITY"; + break; + + case OTX_CPT_MSG_PF_TYPE: + str = "PF_TYPE"; + break; + + case OTX_CPT_MSG_ACK: + str = "ACK"; + break; + + case OTX_CPT_MSG_NACK: + str = "NACK"; + break; + } + + return str; +} + +static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id) +{ + char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE]; + + hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8, + raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false); + if (vf_id >= 0) + pr_debug("MBOX opcode %s received from VF%d raw_data %s\n", + get_mbox_opcode_str(mbox_msg->msg), vf_id, + raw_data_str); + else + pr_debug("MBOX opcode %s received from PF raw_data %s\n", + get_mbox_opcode_str(mbox_msg->msg), raw_data_str); +} + +static void otx_cpt_send_msg_to_vf(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + /* Writing mbox(0) causes interrupt */ + writeq(mbx->data, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1)); + writeq(mbx->msg, cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0)); +} + +/* + * ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void otx_cpt_mbox_send_ack(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = OTX_CPT_MSG_ACK; + otx_cpt_send_msg_to_vf(cpt, vf, mbx); +} + +/* NACKs VF's mailbox message that PF is not able to complete the action */ +static void otx_cptpf_mbox_send_nack(struct otx_cpt_device *cpt, int vf, + struct otx_cpt_mbox *mbx) +{ + mbx->data = 0ull; + mbx->msg = OTX_CPT_MSG_NACK; + otx_cpt_send_msg_to_vf(cpt, vf, mbx); +} + +static void otx_cpt_clear_mbox_intr(struct otx_cpt_device *cpt, u32 vf) +{ + /* W1C for the VF */ + writeq(1ull << vf, cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0)); +} + +/* + * Configure QLEN/Chunk sizes for VF + */ +static void otx_cpt_cfg_qlen_for_vf(struct otx_cpt_device *cpt, int vf, + u32 size) +{ + union otx_cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); + pf_qx_ctl.s.size = size; + pf_qx_ctl.s.cont_err = true; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); +} + +/* + * Configure VQ priority + */ +static void otx_cpt_cfg_vq_priority(struct otx_cpt_device *cpt, int vf, u32 pri) +{ + union otx_cptx_pf_qx_ctl pf_qx_ctl; + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); + pf_qx_ctl.s.pri = pri; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(vf)); +} + +static int otx_cpt_bind_vq_to_grp(struct otx_cpt_device *cpt, u8 q, u8 grp) +{ + struct device *dev = &cpt->pdev->dev; + struct otx_cpt_eng_grp_info *eng_grp; + union otx_cptx_pf_qx_ctl pf_qx_ctl; + struct otx_cpt_ucode *ucode; + + if (q >= cpt->max_vfs) { + dev_err(dev, "Requested queue %d is > than maximum avail %d\n", + q, cpt->max_vfs); + return -EINVAL; + } + + if (grp >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Requested group %d is > than maximum avail %d\n", + grp, OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + eng_grp = &cpt->eng_grps.grp[grp]; + if (!eng_grp->is_enabled) { + dev_err(dev, "Requested engine group %d is disabled\n", grp); + return -EINVAL; + } + + pf_qx_ctl.u = readq(cpt->reg_base + OTX_CPT_PF_QX_CTL(q)); + pf_qx_ctl.s.grp = grp; + writeq(pf_qx_ctl.u, cpt->reg_base + OTX_CPT_PF_QX_CTL(q)); + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + + if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_SE_TYPES)) + return OTX_CPT_SE_TYPES; + else if (otx_cpt_uc_supports_eng_type(ucode, OTX_CPT_AE_TYPES)) + return OTX_CPT_AE_TYPES; + else + return BAD_OTX_CPTVF_TYPE; +} + +/* Interrupt handler to handle mailbox messages from VFs */ +static void otx_cpt_handle_mbox_intr(struct otx_cpt_device *cpt, int vf) +{ + int vftype = 0; + struct otx_cpt_mbox mbx = {}; + struct device *dev = &cpt->pdev->dev; + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 0)); + mbx.data = readq(cpt->reg_base + OTX_CPT_PF_VFX_MBOXX(vf, 1)); + + dump_mbox_msg(&mbx, vf); + + switch (mbx.msg) { + case OTX_CPT_MSG_VF_UP: + mbx.msg = OTX_CPT_MSG_VF_UP; + mbx.data = cpt->vfs_enabled; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_READY: + mbx.msg = OTX_CPT_MSG_READY; + mbx.data = vf; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_VF_DOWN: + /* First msg in VF teardown sequence */ + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_QLEN: + otx_cpt_cfg_qlen_for_vf(cpt, vf, mbx.data); + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_QBIND_GRP: + vftype = otx_cpt_bind_vq_to_grp(cpt, vf, (u8)mbx.data); + if ((vftype != OTX_CPT_AE_TYPES) && + (vftype != OTX_CPT_SE_TYPES)) { + dev_err(dev, "VF%d binding to eng group %llu failed\n", + vf, mbx.data); + otx_cptpf_mbox_send_nack(cpt, vf, &mbx); + } else { + mbx.msg = OTX_CPT_MSG_QBIND_GRP; + mbx.data = vftype; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + } + break; + case OTX_CPT_MSG_PF_TYPE: + mbx.msg = OTX_CPT_MSG_PF_TYPE; + mbx.data = cpt->pf_type; + otx_cpt_send_msg_to_vf(cpt, vf, &mbx); + break; + case OTX_CPT_MSG_VQ_PRIORITY: + otx_cpt_cfg_vq_priority(cpt, vf, mbx.data); + otx_cpt_mbox_send_ack(cpt, vf, &mbx); + break; + default: + dev_err(&cpt->pdev->dev, "Invalid msg from VF%d, msg 0x%llx\n", + vf, mbx.msg); + break; + } +} + +void otx_cpt_mbox_intr_handler (struct otx_cpt_device *cpt, int mbx) +{ + u64 intr; + u8 vf; + + intr = readq(cpt->reg_base + OTX_CPT_PF_MBOX_INTX(0)); + pr_debug("PF interrupt mbox%d mask 0x%llx\n", mbx, intr); + for (vf = 0; vf < cpt->max_vfs; vf++) { + if (intr & (1ULL << vf)) { + otx_cpt_handle_mbox_intr(cpt, vf); + otx_cpt_clear_mbox_intr(cpt, vf); + } + } +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c new file mode 100644 index 0000000000..c4250e5fcf --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c @@ -0,0 +1,1694 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ctype.h> +#include <linux/firmware.h> +#include "otx_cpt_common.h" +#include "otx_cptpf_ucode.h" +#include "otx_cptpf.h" + +#define CSR_DELAY 30 +/* Tar archive defines */ +#define TAR_MAGIC "ustar" +#define TAR_MAGIC_LEN 6 +#define TAR_BLOCK_LEN 512 +#define REGTYPE '0' +#define AREGTYPE '\0' + +/* tar header as defined in POSIX 1003.1-1990. */ +struct tar_hdr_t { + char name[100]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[12]; + char chksum[8]; + char typeflag; + char linkname[100]; + char magic[6]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[155]; +}; + +struct tar_blk_t { + union { + struct tar_hdr_t hdr; + char block[TAR_BLOCK_LEN]; + }; +}; + +struct tar_arch_info_t { + struct list_head ucodes; + const struct firmware *fw; +}; + +static struct otx_cpt_bitmap get_cores_bmap(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_bitmap bmap = { {0} }; + bool found = false; + int i; + + if (eng_grp->g->engs_num > OTX_CPT_MAX_ENGINES) { + dev_err(dev, "unsupported number of engines %d on octeontx\n", + eng_grp->g->engs_num); + return bmap; + } + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (eng_grp->engs[i].type) { + bitmap_or(bmap.bits, bmap.bits, + eng_grp->engs[i].bmap, + eng_grp->g->engs_num); + bmap.size = eng_grp->g->engs_num; + found = true; + } + } + + if (!found) + dev_err(dev, "No engines reserved for engine group %d\n", + eng_grp->idx); + return bmap; +} + +static int is_eng_type(int val, int eng_type) +{ + return val & (1 << eng_type); +} + +static int dev_supports_eng_type(struct otx_cpt_eng_grps *eng_grps, + int eng_type) +{ + return is_eng_type(eng_grps->eng_types_supported, eng_type); +} + +static void set_ucode_filename(struct otx_cpt_ucode *ucode, + const char *filename) +{ + strscpy(ucode->filename, filename, OTX_CPT_UCODE_NAME_LENGTH); +} + +static char *get_eng_type_str(int eng_type) +{ + char *str = "unknown"; + + switch (eng_type) { + case OTX_CPT_SE_TYPES: + str = "SE"; + break; + + case OTX_CPT_AE_TYPES: + str = "AE"; + break; + } + return str; +} + +static char *get_ucode_type_str(int ucode_type) +{ + char *str = "unknown"; + + switch (ucode_type) { + case (1 << OTX_CPT_SE_TYPES): + str = "SE"; + break; + + case (1 << OTX_CPT_AE_TYPES): + str = "AE"; + break; + } + return str; +} + +static int get_ucode_type(struct otx_cpt_ucode_hdr *ucode_hdr, int *ucode_type) +{ + char tmp_ver_str[OTX_CPT_UCODE_VER_STR_SZ]; + u32 i, val = 0; + u8 nn; + + strscpy(tmp_ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); + for (i = 0; i < strlen(tmp_ver_str); i++) + tmp_ver_str[i] = tolower(tmp_ver_str[i]); + + nn = ucode_hdr->ver_num.nn; + if (strnstr(tmp_ver_str, "se-", OTX_CPT_UCODE_VER_STR_SZ) && + (nn == OTX_CPT_SE_UC_TYPE1 || nn == OTX_CPT_SE_UC_TYPE2 || + nn == OTX_CPT_SE_UC_TYPE3)) + val |= 1 << OTX_CPT_SE_TYPES; + if (strnstr(tmp_ver_str, "ae", OTX_CPT_UCODE_VER_STR_SZ) && + nn == OTX_CPT_AE_UC_TYPE) + val |= 1 << OTX_CPT_AE_TYPES; + + *ucode_type = val; + + if (!val) + return -EINVAL; + if (is_eng_type(val, OTX_CPT_AE_TYPES) && + is_eng_type(val, OTX_CPT_SE_TYPES)) + return -EINVAL; + return 0; +} + +static int is_mem_zero(const char *ptr, int size) +{ + int i; + + for (i = 0; i < size; i++) { + if (ptr[i]) + return 0; + } + return 1; +} + +static int cpt_set_ucode_base(struct otx_cpt_eng_grp_info *eng_grp, void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + dma_addr_t dma_addr; + struct otx_cpt_bitmap bmap; + int i; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + if (eng_grp->mirror.is_ena) + dma_addr = + eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].align_dma; + else + dma_addr = eng_grp->ucode[0].align_dma; + + /* + * Set UCODE_BASE only for the cores which are not used, + * other cores should have already valid UCODE_BASE set + */ + for_each_set_bit(i, bmap.bits, bmap.size) + if (!eng_grp->g->eng_ref_cnt[i]) + writeq((u64) dma_addr, cpt->reg_base + + OTX_CPT_PF_ENGX_UCODE_BASE(i)); + return 0; +} + +static int cpt_detach_and_disable_cores(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + struct otx_cpt_bitmap bmap = { {0} }; + int timeout = 10; + int i, busy; + u64 reg; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Detach the cores from group */ + reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + for_each_set_bit(i, bmap.bits, bmap.size) { + if (reg & (1ull << i)) { + eng_grp->g->eng_ref_cnt[i]--; + reg &= ~(1ull << i); + } + } + writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + + /* Wait for cores to become idle */ + do { + busy = 0; + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + for_each_set_bit(i, bmap.bits, bmap.size) + if (reg & (1ull << i)) { + busy = 1; + break; + } + } while (busy); + + /* Disable the cores only if they are not used anymore */ + reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL); + for_each_set_bit(i, bmap.bits, bmap.size) + if (!eng_grp->g->eng_ref_cnt[i]) + reg &= ~(1ull << i); + writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL); + + return 0; +} + +static int cpt_attach_and_enable_cores(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx_cpt_device *cpt = (struct otx_cpt_device *) obj; + struct otx_cpt_bitmap bmap; + u64 reg; + int i; + + bmap = get_cores_bmap(&cpt->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + /* Attach the cores to the group */ + reg = readq(cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + for_each_set_bit(i, bmap.bits, bmap.size) { + if (!(reg & (1ull << i))) { + eng_grp->g->eng_ref_cnt[i]++; + reg |= 1ull << i; + } + } + writeq(reg, cpt->reg_base + OTX_CPT_PF_GX_EN(eng_grp->idx)); + + /* Enable the cores */ + reg = readq(cpt->reg_base + OTX_CPT_PF_EXE_CTL); + for_each_set_bit(i, bmap.bits, bmap.size) + reg |= 1ull << i; + writeq(reg, cpt->reg_base + OTX_CPT_PF_EXE_CTL); + + return 0; +} + +static int process_tar_file(struct device *dev, + struct tar_arch_info_t *tar_arch, char *filename, + const u8 *data, u32 size) +{ + struct tar_ucode_info_t *tar_info; + struct otx_cpt_ucode_hdr *ucode_hdr; + int ucode_type, ucode_size; + unsigned int code_length; + + /* + * If size is less than microcode header size then don't report + * an error because it might not be microcode file, just process + * next file from archive + */ + if (size < sizeof(struct otx_cpt_ucode_hdr)) + return 0; + + ucode_hdr = (struct otx_cpt_ucode_hdr *) data; + /* + * If microcode version can't be found don't report an error + * because it might not be microcode file, just process next file + */ + if (get_ucode_type(ucode_hdr, &ucode_type)) + return 0; + + code_length = ntohl(ucode_hdr->code_length); + if (code_length >= INT_MAX / 2) { + dev_err(dev, "Invalid code_length %u\n", code_length); + return -EINVAL; + } + + ucode_size = code_length * 2; + if (!ucode_size || (size < round_up(ucode_size, 16) + + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { + dev_err(dev, "Ucode %s invalid size\n", filename); + return -EINVAL; + } + + tar_info = kzalloc(sizeof(struct tar_ucode_info_t), GFP_KERNEL); + if (!tar_info) + return -ENOMEM; + + tar_info->ucode_ptr = data; + set_ucode_filename(&tar_info->ucode, filename); + memcpy(tar_info->ucode.ver_str, ucode_hdr->ver_str, + OTX_CPT_UCODE_VER_STR_SZ); + tar_info->ucode.ver_num = ucode_hdr->ver_num; + tar_info->ucode.type = ucode_type; + tar_info->ucode.size = ucode_size; + list_add_tail(&tar_info->list, &tar_arch->ucodes); + + return 0; +} + +static void release_tar_archive(struct tar_arch_info_t *tar_arch) +{ + struct tar_ucode_info_t *curr, *temp; + + if (!tar_arch) + return; + + list_for_each_entry_safe(curr, temp, &tar_arch->ucodes, list) { + list_del(&curr->list); + kfree(curr); + } + + release_firmware(tar_arch->fw); + kfree(tar_arch); +} + +static struct tar_ucode_info_t *get_uc_from_tar_archive( + struct tar_arch_info_t *tar_arch, + int ucode_type) +{ + struct tar_ucode_info_t *curr, *uc_found = NULL; + + list_for_each_entry(curr, &tar_arch->ucodes, list) { + if (!is_eng_type(curr->ucode.type, ucode_type)) + continue; + + if (!uc_found) { + uc_found = curr; + continue; + } + + switch (ucode_type) { + case OTX_CPT_AE_TYPES: + break; + + case OTX_CPT_SE_TYPES: + if (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE2 || + (uc_found->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE3 + && curr->ucode.ver_num.nn == OTX_CPT_SE_UC_TYPE1)) + uc_found = curr; + break; + } + } + + return uc_found; +} + +static void print_tar_dbg_info(struct tar_arch_info_t *tar_arch, + char *tar_filename) +{ + struct tar_ucode_info_t *curr; + + pr_debug("Tar archive filename %s\n", tar_filename); + pr_debug("Tar archive pointer %p, size %ld\n", tar_arch->fw->data, + tar_arch->fw->size); + list_for_each_entry(curr, &tar_arch->ucodes, list) { + pr_debug("Ucode filename %s\n", curr->ucode.filename); + pr_debug("Ucode version string %s\n", curr->ucode.ver_str); + pr_debug("Ucode version %d.%d.%d.%d\n", + curr->ucode.ver_num.nn, curr->ucode.ver_num.xx, + curr->ucode.ver_num.yy, curr->ucode.ver_num.zz); + pr_debug("Ucode type (%d) %s\n", curr->ucode.type, + get_ucode_type_str(curr->ucode.type)); + pr_debug("Ucode size %d\n", curr->ucode.size); + pr_debug("Ucode ptr %p\n", curr->ucode_ptr); + } +} + +static struct tar_arch_info_t *load_tar_archive(struct device *dev, + char *tar_filename) +{ + struct tar_arch_info_t *tar_arch = NULL; + struct tar_blk_t *tar_blk; + unsigned int cur_size; + size_t tar_offs = 0; + size_t tar_size; + int ret; + + tar_arch = kzalloc(sizeof(struct tar_arch_info_t), GFP_KERNEL); + if (!tar_arch) + return NULL; + + INIT_LIST_HEAD(&tar_arch->ucodes); + + /* Load tar archive */ + ret = request_firmware(&tar_arch->fw, tar_filename, dev); + if (ret) + goto release_tar_arch; + + if (tar_arch->fw->size < TAR_BLOCK_LEN) { + dev_err(dev, "Invalid tar archive %s\n", tar_filename); + goto release_tar_arch; + } + + tar_size = tar_arch->fw->size; + tar_blk = (struct tar_blk_t *) tar_arch->fw->data; + if (strncmp(tar_blk->hdr.magic, TAR_MAGIC, TAR_MAGIC_LEN - 1)) { + dev_err(dev, "Unsupported format of tar archive %s\n", + tar_filename); + goto release_tar_arch; + } + + while (1) { + /* Read current file size */ + ret = kstrtouint(tar_blk->hdr.size, 8, &cur_size); + if (ret) + goto release_tar_arch; + + if (tar_offs + cur_size > tar_size || + tar_offs + 2*TAR_BLOCK_LEN > tar_size) { + dev_err(dev, "Invalid tar archive %s\n", tar_filename); + goto release_tar_arch; + } + + tar_offs += TAR_BLOCK_LEN; + if (tar_blk->hdr.typeflag == REGTYPE || + tar_blk->hdr.typeflag == AREGTYPE) { + ret = process_tar_file(dev, tar_arch, + tar_blk->hdr.name, + &tar_arch->fw->data[tar_offs], + cur_size); + if (ret) + goto release_tar_arch; + } + + tar_offs += (cur_size/TAR_BLOCK_LEN) * TAR_BLOCK_LEN; + if (cur_size % TAR_BLOCK_LEN) + tar_offs += TAR_BLOCK_LEN; + + /* Check for the end of the archive */ + if (tar_offs + 2*TAR_BLOCK_LEN > tar_size) { + dev_err(dev, "Invalid tar archive %s\n", tar_filename); + goto release_tar_arch; + } + + if (is_mem_zero(&tar_arch->fw->data[tar_offs], + 2*TAR_BLOCK_LEN)) + break; + + /* Read next block from tar archive */ + tar_blk = (struct tar_blk_t *) &tar_arch->fw->data[tar_offs]; + } + + print_tar_dbg_info(tar_arch, tar_filename); + return tar_arch; +release_tar_arch: + release_tar_archive(tar_arch); + return NULL; +} + +static struct otx_cpt_engs_rsvd *find_engines_by_type( + struct otx_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + int i; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + if (eng_grp->engs[i].type == eng_type) + return &eng_grp->engs[i]; + } + return NULL; +} + +int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type) +{ + return is_eng_type(ucode->type, eng_type); +} +EXPORT_SYMBOL_GPL(otx_cpt_uc_supports_eng_type); + +int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + struct otx_cpt_engs_rsvd *engs; + + engs = find_engines_by_type(eng_grp, eng_type); + + return (engs != NULL ? 1 : 0); +} +EXPORT_SYMBOL_GPL(otx_cpt_eng_grp_has_eng_type); + +static void print_ucode_info(struct otx_cpt_eng_grp_info *eng_grp, + char *buf, int size) +{ + if (eng_grp->mirror.is_ena) { + scnprintf(buf, size, "%s (shared with engine_group%d)", + eng_grp->g->grp[eng_grp->mirror.idx].ucode[0].ver_str, + eng_grp->mirror.idx); + } else { + scnprintf(buf, size, "%s", eng_grp->ucode[0].ver_str); + } +} + +static void print_engs_info(struct otx_cpt_eng_grp_info *eng_grp, + char *buf, int size, int idx) +{ + struct otx_cpt_engs_rsvd *mirrored_engs = NULL; + struct otx_cpt_engs_rsvd *engs; + int len, i; + + buf[0] = '\0'; + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (idx != -1 && idx != i) + continue; + + if (eng_grp->mirror.is_ena) + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + if (i > 0 && idx == -1) { + len = strlen(buf); + scnprintf(buf+len, size-len, ", "); + } + + len = strlen(buf); + scnprintf(buf+len, size-len, "%d %s ", mirrored_engs ? + engs->count + mirrored_engs->count : engs->count, + get_eng_type_str(engs->type)); + if (mirrored_engs) { + len = strlen(buf); + scnprintf(buf+len, size-len, + "(%d shared with engine_group%d) ", + engs->count <= 0 ? engs->count + + mirrored_engs->count : mirrored_engs->count, + eng_grp->mirror.idx); + } + } +} + +static void print_ucode_dbg_info(struct otx_cpt_ucode *ucode) +{ + pr_debug("Ucode info\n"); + pr_debug("Ucode version string %s\n", ucode->ver_str); + pr_debug("Ucode version %d.%d.%d.%d\n", ucode->ver_num.nn, + ucode->ver_num.xx, ucode->ver_num.yy, ucode->ver_num.zz); + pr_debug("Ucode type %s\n", get_ucode_type_str(ucode->type)); + pr_debug("Ucode size %d\n", ucode->size); + pr_debug("Ucode virt address %16.16llx\n", (u64)ucode->align_va); + pr_debug("Ucode phys address %16.16llx\n", ucode->align_dma); +} + +static void cpt_print_engines_mask(struct otx_cpt_eng_grp_info *eng_grp, + struct device *dev, char *buf, int size) +{ + struct otx_cpt_bitmap bmap; + u32 mask[2]; + + bmap = get_cores_bmap(dev, eng_grp); + if (!bmap.size) { + scnprintf(buf, size, "unknown"); + return; + } + bitmap_to_arr32(mask, bmap.bits, bmap.size); + scnprintf(buf, size, "%8.8x %8.8x", mask[1], mask[0]); +} + + +static void print_dbg_info(struct device *dev, + struct otx_cpt_eng_grps *eng_grps) +{ + char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *mirrored_grp; + char engs_mask[OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *grp; + struct otx_cpt_engs_rsvd *engs; + u32 mask[4]; + int i, j; + + pr_debug("Engine groups global info\n"); + pr_debug("max SE %d, max AE %d\n", + eng_grps->avail.max_se_cnt, eng_grps->avail.max_ae_cnt); + pr_debug("free SE %d\n", eng_grps->avail.se_cnt); + pr_debug("free AE %d\n", eng_grps->avail.ae_cnt); + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + pr_debug("engine_group%d, state %s\n", i, grp->is_enabled ? + "enabled" : "disabled"); + if (grp->is_enabled) { + mirrored_grp = &eng_grps->grp[grp->mirror.idx]; + pr_debug("Ucode0 filename %s, version %s\n", + grp->mirror.is_ena ? + mirrored_grp->ucode[0].filename : + grp->ucode[0].filename, + grp->mirror.is_ena ? + mirrored_grp->ucode[0].ver_str : + grp->ucode[0].ver_str); + } + + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + engs = &grp->engs[j]; + if (engs->type) { + print_engs_info(grp, engs_info, + 2*OTX_CPT_UCODE_NAME_LENGTH, j); + pr_debug("Slot%d: %s\n", j, engs_info); + bitmap_to_arr32(mask, engs->bmap, + eng_grps->engs_num); + pr_debug("Mask: %8.8x %8.8x %8.8x %8.8x\n", + mask[3], mask[2], mask[1], mask[0]); + } else + pr_debug("Slot%d not used\n", j); + } + if (grp->is_enabled) { + cpt_print_engines_mask(grp, dev, engs_mask, + OTX_CPT_UCODE_NAME_LENGTH); + pr_debug("Cmask: %s\n", engs_mask); + } + } +} + +static int update_engines_avail_count(struct device *dev, + struct otx_cpt_engs_available *avail, + struct otx_cpt_engs_rsvd *engs, int val) +{ + switch (engs->type) { + case OTX_CPT_SE_TYPES: + avail->se_cnt += val; + break; + + case OTX_CPT_AE_TYPES: + avail->ae_cnt += val; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + return 0; +} + +static int update_engines_offset(struct device *dev, + struct otx_cpt_engs_available *avail, + struct otx_cpt_engs_rsvd *engs) +{ + switch (engs->type) { + case OTX_CPT_SE_TYPES: + engs->offset = 0; + break; + + case OTX_CPT_AE_TYPES: + engs->offset = avail->max_se_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + return 0; +} + +static int release_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp) +{ + int i, ret = 0; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) + continue; + + if (grp->engs[i].count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, + &grp->engs[i], + grp->engs[i].count); + if (ret) + return ret; + } + + grp->engs[i].type = 0; + grp->engs[i].count = 0; + grp->engs[i].offset = 0; + grp->engs[i].ucode = NULL; + bitmap_zero(grp->engs[i].bmap, grp->g->engs_num); + } + + return 0; +} + +static int do_reserve_engines(struct device *dev, + struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_engs) +{ + struct otx_cpt_engs_rsvd *engs = NULL; + int i, ret; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) { + engs = &grp->engs[i]; + break; + } + } + + if (!engs) + return -ENOMEM; + + engs->type = req_engs->type; + engs->count = req_engs->count; + + ret = update_engines_offset(dev, &grp->g->avail, engs); + if (ret) + return ret; + + if (engs->count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, engs, + -engs->count); + if (ret) + return ret; + } + + return 0; +} + +static int check_engines_availability(struct device *dev, + struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_eng) +{ + int avail_cnt = 0; + + switch (req_eng->type) { + case OTX_CPT_SE_TYPES: + avail_cnt = grp->g->avail.se_cnt; + break; + + case OTX_CPT_AE_TYPES: + avail_cnt = grp->g->avail.ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", req_eng->type); + return -EINVAL; + } + + if (avail_cnt < req_eng->count) { + dev_err(dev, + "Error available %s engines %d < than requested %d\n", + get_eng_type_str(req_eng->type), + avail_cnt, req_eng->count); + return -EBUSY; + } + + return 0; +} + +static int reserve_engines(struct device *dev, struct otx_cpt_eng_grp_info *grp, + struct otx_cpt_engines *req_engs, int req_cnt) +{ + int i, ret; + + /* Validate if a number of requested engines is available */ + for (i = 0; i < req_cnt; i++) { + ret = check_engines_availability(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + + /* Reserve requested engines for this engine group */ + for (i = 0; i < req_cnt; i++) { + ret = do_reserve_engines(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + return 0; +} + +static ssize_t eng_grp_info_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + char ucode_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + char engs_info[2*OTX_CPT_UCODE_NAME_LENGTH]; + char engs_mask[OTX_CPT_UCODE_NAME_LENGTH]; + struct otx_cpt_eng_grp_info *eng_grp; + int ret; + + eng_grp = container_of(attr, struct otx_cpt_eng_grp_info, info_attr); + mutex_lock(&eng_grp->g->lock); + + print_engs_info(eng_grp, engs_info, 2*OTX_CPT_UCODE_NAME_LENGTH, -1); + print_ucode_info(eng_grp, ucode_info, 2*OTX_CPT_UCODE_NAME_LENGTH); + cpt_print_engines_mask(eng_grp, dev, engs_mask, + OTX_CPT_UCODE_NAME_LENGTH); + ret = scnprintf(buf, PAGE_SIZE, + "Microcode : %s\nEngines: %s\nEngines mask: %s\n", + ucode_info, engs_info, engs_mask); + + mutex_unlock(&eng_grp->g->lock); + return ret; +} + +static int create_sysfs_eng_grps_info(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + eng_grp->info_attr.show = eng_grp_info_show; + eng_grp->info_attr.store = NULL; + eng_grp->info_attr.attr.name = eng_grp->sysfs_info_name; + eng_grp->info_attr.attr.mode = 0440; + sysfs_attr_init(&eng_grp->info_attr.attr); + return device_create_file(dev, &eng_grp->info_attr); +} + +static void ucode_unload(struct device *dev, struct otx_cpt_ucode *ucode) +{ + if (ucode->va) { + dma_free_coherent(dev, ucode->size + OTX_CPT_UCODE_ALIGNMENT, + ucode->va, ucode->dma); + ucode->va = NULL; + ucode->align_va = NULL; + ucode->dma = 0; + ucode->align_dma = 0; + ucode->size = 0; + } + + memset(&ucode->ver_str, 0, OTX_CPT_UCODE_VER_STR_SZ); + memset(&ucode->ver_num, 0, sizeof(struct otx_cpt_ucode_ver_num)); + set_ucode_filename(ucode, ""); + ucode->type = 0; +} + +static int copy_ucode_to_dma_mem(struct device *dev, + struct otx_cpt_ucode *ucode, + const u8 *ucode_data) +{ + u32 i; + + /* Allocate DMAable space */ + ucode->va = dma_alloc_coherent(dev, ucode->size + + OTX_CPT_UCODE_ALIGNMENT, + &ucode->dma, GFP_KERNEL); + if (!ucode->va) { + dev_err(dev, "Unable to allocate space for microcode\n"); + return -ENOMEM; + } + ucode->align_va = PTR_ALIGN(ucode->va, OTX_CPT_UCODE_ALIGNMENT); + ucode->align_dma = PTR_ALIGN(ucode->dma, OTX_CPT_UCODE_ALIGNMENT); + + memcpy((void *) ucode->align_va, (void *) ucode_data + + sizeof(struct otx_cpt_ucode_hdr), ucode->size); + + /* Byte swap 64-bit */ + for (i = 0; i < (ucode->size / 8); i++) + ((__be64 *)ucode->align_va)[i] = + cpu_to_be64(((u64 *)ucode->align_va)[i]); + /* Ucode needs 16-bit swap */ + for (i = 0; i < (ucode->size / 2); i++) + ((__be16 *)ucode->align_va)[i] = + cpu_to_be16(((u16 *)ucode->align_va)[i]); + return 0; +} + +static int ucode_load(struct device *dev, struct otx_cpt_ucode *ucode, + const char *ucode_filename) +{ + struct otx_cpt_ucode_hdr *ucode_hdr; + const struct firmware *fw; + unsigned int code_length; + int ret; + + set_ucode_filename(ucode, ucode_filename); + ret = request_firmware(&fw, ucode->filename, dev); + if (ret) + return ret; + + ucode_hdr = (struct otx_cpt_ucode_hdr *) fw->data; + memcpy(ucode->ver_str, ucode_hdr->ver_str, OTX_CPT_UCODE_VER_STR_SZ); + ucode->ver_num = ucode_hdr->ver_num; + code_length = ntohl(ucode_hdr->code_length); + if (code_length >= INT_MAX / 2) { + dev_err(dev, "Ucode invalid code_length %u\n", code_length); + ret = -EINVAL; + goto release_fw; + } + ucode->size = code_length * 2; + if (!ucode->size || (fw->size < round_up(ucode->size, 16) + + sizeof(struct otx_cpt_ucode_hdr) + OTX_CPT_UCODE_SIGN_LEN)) { + dev_err(dev, "Ucode %s invalid size\n", ucode_filename); + ret = -EINVAL; + goto release_fw; + } + + ret = get_ucode_type(ucode_hdr, &ucode->type); + if (ret) { + dev_err(dev, "Microcode %s unknown type 0x%x\n", + ucode->filename, ucode->type); + goto release_fw; + } + + ret = copy_ucode_to_dma_mem(dev, ucode, fw->data); + if (ret) + goto release_fw; + + print_ucode_dbg_info(ucode); +release_fw: + release_firmware(fw); + return ret; +} + +static int enable_eng_grp(struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int ret; + + ret = cpt_set_ucode_base(eng_grp, obj); + if (ret) + return ret; + + ret = cpt_attach_and_enable_cores(eng_grp, obj); + return ret; +} + +static int disable_eng_grp(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int i, ret; + + ret = cpt_detach_and_disable_cores(eng_grp, obj); + if (ret) + return ret; + + /* Unload ucode used by this engine group */ + ucode_unload(dev, &eng_grp->ucode[0]); + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + eng_grp->engs[i].ucode = &eng_grp->ucode[0]; + } + + ret = cpt_set_ucode_base(eng_grp, obj); + + return ret; +} + +static void setup_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp, + struct otx_cpt_eng_grp_info *src_grp) +{ + /* Setup fields for engine group which is mirrored */ + src_grp->mirror.is_ena = false; + src_grp->mirror.idx = 0; + src_grp->mirror.ref_count++; + + /* Setup fields for mirroring engine group */ + dst_grp->mirror.is_ena = true; + dst_grp->mirror.idx = src_grp->idx; + dst_grp->mirror.ref_count = 0; +} + +static void remove_eng_grp_mirroring(struct otx_cpt_eng_grp_info *dst_grp) +{ + struct otx_cpt_eng_grp_info *src_grp; + + if (!dst_grp->mirror.is_ena) + return; + + src_grp = &dst_grp->g->grp[dst_grp->mirror.idx]; + + src_grp->mirror.ref_count--; + dst_grp->mirror.is_ena = false; + dst_grp->mirror.idx = 0; + dst_grp->mirror.ref_count = 0; +} + +static void update_requested_engs(struct otx_cpt_eng_grp_info *mirrored_eng_grp, + struct otx_cpt_engines *engs, int engs_cnt) +{ + struct otx_cpt_engs_rsvd *mirrored_engs; + int i; + + for (i = 0; i < engs_cnt; i++) { + mirrored_engs = find_engines_by_type(mirrored_eng_grp, + engs[i].type); + if (!mirrored_engs) + continue; + + /* + * If mirrored group has this type of engines attached then + * there are 3 scenarios possible: + * 1) mirrored_engs.count == engs[i].count then all engines + * from mirrored engine group will be shared with this engine + * group + * 2) mirrored_engs.count > engs[i].count then only a subset of + * engines from mirrored engine group will be shared with this + * engine group + * 3) mirrored_engs.count < engs[i].count then all engines + * from mirrored engine group will be shared with this group + * and additional engines will be reserved for exclusively use + * by this engine group + */ + engs[i].count -= mirrored_engs->count; + } +} + +static struct otx_cpt_eng_grp_info *find_mirrored_eng_grp( + struct otx_cpt_eng_grp_info *grp) +{ + struct otx_cpt_eng_grps *eng_grps = grp->g; + int i; + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + continue; + if (eng_grps->grp[i].ucode[0].type) + continue; + if (grp->idx == i) + continue; + if (!strncasecmp(eng_grps->grp[i].ucode[0].ver_str, + grp->ucode[0].ver_str, + OTX_CPT_UCODE_VER_STR_SZ)) + return &eng_grps->grp[i]; + } + + return NULL; +} + +static struct otx_cpt_eng_grp_info *find_unused_eng_grp( + struct otx_cpt_eng_grps *eng_grps) +{ + int i; + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + return &eng_grps->grp[i]; + } + return NULL; +} + +static int eng_grp_update_masks(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_engs_rsvd *engs, *mirrored_engs; + struct otx_cpt_bitmap tmp_bmap = { {0} }; + int i, j, cnt, max_cnt; + int bit; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (engs->count <= 0) + continue; + + switch (engs->type) { + case OTX_CPT_SE_TYPES: + max_cnt = eng_grp->g->avail.max_se_cnt; + break; + + case OTX_CPT_AE_TYPES: + max_cnt = eng_grp->g->avail.max_ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + cnt = engs->count; + WARN_ON(engs->offset + max_cnt > OTX_CPT_MAX_ENGINES); + bitmap_zero(tmp_bmap.bits, eng_grp->g->engs_num); + for (j = engs->offset; j < engs->offset + max_cnt; j++) { + if (!eng_grp->g->eng_ref_cnt[j]) { + bitmap_set(tmp_bmap.bits, j, 1); + cnt--; + if (!cnt) + break; + } + } + + if (cnt) + return -ENOSPC; + + bitmap_copy(engs->bmap, tmp_bmap.bits, eng_grp->g->engs_num); + } + + if (!eng_grp->mirror.is_ena) + return 0; + + for (i = 0; i < OTX_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + WARN_ON(!mirrored_engs && engs->count <= 0); + if (!mirrored_engs) + continue; + + bitmap_copy(tmp_bmap.bits, mirrored_engs->bmap, + eng_grp->g->engs_num); + if (engs->count < 0) { + bit = find_first_bit(mirrored_engs->bmap, + eng_grp->g->engs_num); + bitmap_clear(tmp_bmap.bits, bit, -engs->count); + } + bitmap_or(engs->bmap, engs->bmap, tmp_bmap.bits, + eng_grp->g->engs_num); + } + return 0; +} + +static int delete_engine_group(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp) +{ + int i, ret; + + if (!eng_grp->is_enabled) + return -EINVAL; + + if (eng_grp->mirror.ref_count) { + dev_err(dev, "Can't delete engine_group%d as it is used by engine_group(s):", + eng_grp->idx); + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + if (eng_grp->g->grp[i].mirror.is_ena && + eng_grp->g->grp[i].mirror.idx == eng_grp->idx) + pr_cont(" %d", i); + } + pr_cont("\n"); + return -EINVAL; + } + + /* Removing engine group mirroring if enabled */ + remove_eng_grp_mirroring(eng_grp); + + /* Disable engine group */ + ret = disable_eng_grp(dev, eng_grp, eng_grp->g->obj); + if (ret) + return ret; + + /* Release all engines held by this engine group */ + ret = release_engines(dev, eng_grp); + if (ret) + return ret; + + device_remove_file(dev, &eng_grp->info_attr); + eng_grp->is_enabled = false; + + return 0; +} + +static int validate_1_ucode_scenario(struct device *dev, + struct otx_cpt_eng_grp_info *eng_grp, + struct otx_cpt_engines *engs, int engs_cnt) +{ + int i; + + /* Verify that ucode loaded supports requested engine types */ + for (i = 0; i < engs_cnt; i++) { + if (!otx_cpt_uc_supports_eng_type(&eng_grp->ucode[0], + engs[i].type)) { + dev_err(dev, + "Microcode %s does not support %s engines\n", + eng_grp->ucode[0].filename, + get_eng_type_str(engs[i].type)); + return -EINVAL; + } + } + return 0; +} + +static void update_ucode_ptrs(struct otx_cpt_eng_grp_info *eng_grp) +{ + struct otx_cpt_ucode *ucode; + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + WARN_ON(!eng_grp->engs[0].type); + eng_grp->engs[0].ucode = ucode; +} + +static int create_engine_group(struct device *dev, + struct otx_cpt_eng_grps *eng_grps, + struct otx_cpt_engines *engs, int engs_cnt, + void *ucode_data[], int ucodes_cnt, + bool use_uc_from_tar_arch) +{ + struct otx_cpt_eng_grp_info *mirrored_eng_grp; + struct tar_ucode_info_t *tar_info; + struct otx_cpt_eng_grp_info *eng_grp; + int i, ret = 0; + + if (ucodes_cnt > OTX_CPT_MAX_ETYPES_PER_GRP) + return -EINVAL; + + /* Validate if requested engine types are supported by this device */ + for (i = 0; i < engs_cnt; i++) + if (!dev_supports_eng_type(eng_grps, engs[i].type)) { + dev_err(dev, "Device does not support %s engines\n", + get_eng_type_str(engs[i].type)); + return -EPERM; + } + + /* Find engine group which is not used */ + eng_grp = find_unused_eng_grp(eng_grps); + if (!eng_grp) { + dev_err(dev, "Error all engine groups are being used\n"); + return -ENOSPC; + } + + /* Load ucode */ + for (i = 0; i < ucodes_cnt; i++) { + if (use_uc_from_tar_arch) { + tar_info = (struct tar_ucode_info_t *) ucode_data[i]; + eng_grp->ucode[i] = tar_info->ucode; + ret = copy_ucode_to_dma_mem(dev, &eng_grp->ucode[i], + tar_info->ucode_ptr); + } else + ret = ucode_load(dev, &eng_grp->ucode[i], + (char *) ucode_data[i]); + if (ret) + goto err_ucode_unload; + } + + /* Validate scenario where 1 ucode is used */ + ret = validate_1_ucode_scenario(dev, eng_grp, engs, engs_cnt); + if (ret) + goto err_ucode_unload; + + /* Check if this group mirrors another existing engine group */ + mirrored_eng_grp = find_mirrored_eng_grp(eng_grp); + if (mirrored_eng_grp) { + /* Setup mirroring */ + setup_eng_grp_mirroring(eng_grp, mirrored_eng_grp); + + /* + * Update count of requested engines because some + * of them might be shared with mirrored group + */ + update_requested_engs(mirrored_eng_grp, engs, engs_cnt); + } + + /* Reserve engines */ + ret = reserve_engines(dev, eng_grp, engs, engs_cnt); + if (ret) + goto err_ucode_unload; + + /* Update ucode pointers used by engines */ + update_ucode_ptrs(eng_grp); + + /* Update engine masks used by this group */ + ret = eng_grp_update_masks(dev, eng_grp); + if (ret) + goto err_release_engs; + + /* Create sysfs entry for engine group info */ + ret = create_sysfs_eng_grps_info(dev, eng_grp); + if (ret) + goto err_release_engs; + + /* Enable engine group */ + ret = enable_eng_grp(eng_grp, eng_grps->obj); + if (ret) + goto err_release_engs; + + /* + * If this engine group mirrors another engine group + * then we need to unload ucode as we will use ucode + * from mirrored engine group + */ + if (eng_grp->mirror.is_ena) + ucode_unload(dev, &eng_grp->ucode[0]); + + eng_grp->is_enabled = true; + if (eng_grp->mirror.is_ena) + dev_info(dev, + "Engine_group%d: reuse microcode %s from group %d\n", + eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str, + mirrored_eng_grp->idx); + else + dev_info(dev, "Engine_group%d: microcode loaded %s\n", + eng_grp->idx, eng_grp->ucode[0].ver_str); + + return 0; + +err_release_engs: + release_engines(dev, eng_grp); +err_ucode_unload: + ucode_unload(dev, &eng_grp->ucode[0]); + return ret; +} + +static ssize_t ucode_load_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + char *ucode_filename[OTX_CPT_MAX_ETYPES_PER_GRP]; + char tmp_buf[OTX_CPT_UCODE_NAME_LENGTH] = { 0 }; + char *start, *val, *err_msg, *tmp; + struct otx_cpt_eng_grps *eng_grps; + int grp_idx = 0, ret = -EINVAL; + bool has_se, has_ie, has_ae; + int del_grp_idx = -1; + int ucode_idx = 0; + + if (strlen(buf) > OTX_CPT_UCODE_NAME_LENGTH) + return -EINVAL; + + eng_grps = container_of(attr, struct otx_cpt_eng_grps, ucode_load_attr); + err_msg = "Invalid engine group format"; + strscpy(tmp_buf, buf, OTX_CPT_UCODE_NAME_LENGTH); + start = tmp_buf; + + has_se = has_ie = has_ae = false; + + for (;;) { + val = strsep(&start, ";"); + if (!val) + break; + val = strim(val); + if (!*val) + continue; + + if (!strncasecmp(val, "engine_group", 12)) { + if (del_grp_idx != -1) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 13) + goto err_print; + if (kstrtoint((tmp + 12), 10, &del_grp_idx)) + goto err_print; + val = strim(val); + if (strncasecmp(val, "null", 4)) + goto err_print; + if (strlen(val) != 4) + goto err_print; + } else if (!strncasecmp(val, "se", 2) && strchr(val, ':')) { + if (has_se || ucode_idx) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX_CPT_SE_TYPES; + has_se = true; + } else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) { + if (has_ae || ucode_idx) + goto err_print; + tmp = strim(strsep(&val, ":")); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX_CPT_AE_TYPES; + has_ae = true; + } else { + if (ucode_idx > 1) + goto err_print; + if (!strlen(val)) + goto err_print; + if (strnstr(val, " ", strlen(val))) + goto err_print; + ucode_filename[ucode_idx++] = val; + } + } + + /* Validate input parameters */ + if (del_grp_idx == -1) { + if (!(grp_idx && ucode_idx)) + goto err_print; + + if (ucode_idx > 1 && grp_idx < 2) + goto err_print; + + if (grp_idx > OTX_CPT_MAX_ETYPES_PER_GRP) { + err_msg = "Error max 2 engine types can be attached"; + goto err_print; + } + + } else { + if (del_grp_idx < 0 || + del_grp_idx >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Invalid engine group index %d\n", + del_grp_idx); + ret = -EINVAL; + return ret; + } + + if (!eng_grps->grp[del_grp_idx].is_enabled) { + dev_err(dev, "Error engine_group%d is not configured\n", + del_grp_idx); + ret = -EINVAL; + return ret; + } + + if (grp_idx || ucode_idx) + goto err_print; + } + + mutex_lock(&eng_grps->lock); + + if (eng_grps->is_rdonly) { + dev_err(dev, "Disable VFs before modifying engine groups\n"); + ret = -EACCES; + goto err_unlock; + } + + if (del_grp_idx == -1) + /* create engine group */ + ret = create_engine_group(dev, eng_grps, engs, grp_idx, + (void **) ucode_filename, + ucode_idx, false); + else + /* delete engine group */ + ret = delete_engine_group(dev, &eng_grps->grp[del_grp_idx]); + if (ret) + goto err_unlock; + + print_dbg_info(dev, eng_grps); +err_unlock: + mutex_unlock(&eng_grps->lock); + return ret ? ret : count; +err_print: + dev_err(dev, "%s\n", err_msg); + + return ret; +} + +int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, + int pf_type) +{ + struct tar_ucode_info_t *tar_info[OTX_CPT_MAX_ETYPES_PER_GRP] = {}; + struct otx_cpt_engines engs[OTX_CPT_MAX_ETYPES_PER_GRP] = {}; + struct tar_arch_info_t *tar_arch = NULL; + char *tar_filename; + int i, ret = 0; + + mutex_lock(&eng_grps->lock); + + /* + * We don't create engine group for kernel crypto if attempt to create + * it was already made (when user enabled VFs for the first time) + */ + if (eng_grps->is_first_try) + goto unlock_mutex; + eng_grps->is_first_try = true; + + /* We create group for kcrypto only if no groups are configured */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].is_enabled) + goto unlock_mutex; + + switch (pf_type) { + case OTX_CPT_AE: + case OTX_CPT_SE: + tar_filename = OTX_CPT_UCODE_TAR_FILE_NAME; + break; + + default: + dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type); + ret = -EINVAL; + goto unlock_mutex; + } + + tar_arch = load_tar_archive(&pdev->dev, tar_filename); + if (!tar_arch) + goto unlock_mutex; + + /* + * If device supports SE engines and there is SE microcode in tar + * archive try to create engine group with SE engines for kernel + * crypto functionality (symmetric crypto) + */ + tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_SE_TYPES); + if (tar_info[0] && + dev_supports_eng_type(eng_grps, OTX_CPT_SE_TYPES)) { + + engs[0].type = OTX_CPT_SE_TYPES; + engs[0].count = eng_grps->avail.max_se_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) tar_info, 1, true); + if (ret) + goto release_tar_arch; + } + /* + * If device supports AE engines and there is AE microcode in tar + * archive try to create engine group with AE engines for asymmetric + * crypto functionality. + */ + tar_info[0] = get_uc_from_tar_archive(tar_arch, OTX_CPT_AE_TYPES); + if (tar_info[0] && + dev_supports_eng_type(eng_grps, OTX_CPT_AE_TYPES)) { + + engs[0].type = OTX_CPT_AE_TYPES; + engs[0].count = eng_grps->avail.max_ae_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) tar_info, 1, true); + if (ret) + goto release_tar_arch; + } + + print_dbg_info(&pdev->dev, eng_grps); +release_tar_arch: + release_tar_archive(tar_arch); +unlock_mutex: + mutex_unlock(&eng_grps->lock); + return ret; +} + +void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps, + bool is_rdonly) +{ + mutex_lock(&eng_grps->lock); + + eng_grps->is_rdonly = is_rdonly; + + mutex_unlock(&eng_grps->lock); +} + +void otx_cpt_disable_all_cores(struct otx_cpt_device *cpt) +{ + int grp, timeout = 100; + u64 reg; + + /* Disengage the cores from groups */ + for (grp = 0; grp < OTX_CPT_MAX_ENGINE_GROUPS; grp++) { + writeq(0, cpt->reg_base + OTX_CPT_PF_GX_EN(grp)); + udelay(CSR_DELAY); + } + + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + while (reg) { + udelay(CSR_DELAY); + reg = readq(cpt->reg_base + OTX_CPT_PF_EXEC_BUSY); + if (timeout--) { + dev_warn(&cpt->pdev->dev, "Cores still busy\n"); + break; + } + } + + /* Disable the cores */ + writeq(0, cpt->reg_base + OTX_CPT_PF_EXE_CTL); +} + +void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps) +{ + struct otx_cpt_eng_grp_info *grp; + int i, j; + + mutex_lock(&eng_grps->lock); + if (eng_grps->is_ucode_load_created) { + device_remove_file(&pdev->dev, + &eng_grps->ucode_load_attr); + eng_grps->is_ucode_load_created = false; + } + + /* First delete all mirroring engine groups */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].mirror.is_ena) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Delete remaining engine groups */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Release memory */ + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + kfree(grp->engs[j].bmap); + grp->engs[j].bmap = NULL; + } + } + + mutex_unlock(&eng_grps->lock); +} + +int otx_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, int pf_type) +{ + struct otx_cpt_eng_grp_info *grp; + int i, j, ret = 0; + + mutex_init(&eng_grps->lock); + eng_grps->obj = pci_get_drvdata(pdev); + eng_grps->avail.se_cnt = eng_grps->avail.max_se_cnt; + eng_grps->avail.ae_cnt = eng_grps->avail.max_ae_cnt; + + eng_grps->engs_num = eng_grps->avail.max_se_cnt + + eng_grps->avail.max_ae_cnt; + if (eng_grps->engs_num > OTX_CPT_MAX_ENGINES) { + dev_err(&pdev->dev, + "Number of engines %d > than max supported %d\n", + eng_grps->engs_num, OTX_CPT_MAX_ENGINES); + ret = -EINVAL; + goto err; + } + + for (i = 0; i < OTX_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + grp->g = eng_grps; + grp->idx = i; + + snprintf(grp->sysfs_info_name, OTX_CPT_UCODE_NAME_LENGTH, + "engine_group%d", i); + for (j = 0; j < OTX_CPT_MAX_ETYPES_PER_GRP; j++) { + grp->engs[j].bmap = + kcalloc(BITS_TO_LONGS(eng_grps->engs_num), + sizeof(long), GFP_KERNEL); + if (!grp->engs[j].bmap) { + ret = -ENOMEM; + goto err; + } + } + } + + switch (pf_type) { + case OTX_CPT_SE: + /* OcteonTX 83XX SE CPT PF has only SE engines attached */ + eng_grps->eng_types_supported = 1 << OTX_CPT_SE_TYPES; + break; + + case OTX_CPT_AE: + /* OcteonTX 83XX AE CPT PF has only AE engines attached */ + eng_grps->eng_types_supported = 1 << OTX_CPT_AE_TYPES; + break; + + default: + dev_err(&pdev->dev, "Unknown PF type %d\n", pf_type); + ret = -EINVAL; + goto err; + } + + eng_grps->ucode_load_attr.show = NULL; + eng_grps->ucode_load_attr.store = ucode_load_store; + eng_grps->ucode_load_attr.attr.name = "ucode_load"; + eng_grps->ucode_load_attr.attr.mode = 0220; + sysfs_attr_init(&eng_grps->ucode_load_attr.attr); + ret = device_create_file(&pdev->dev, + &eng_grps->ucode_load_attr); + if (ret) + goto err; + eng_grps->is_ucode_load_created = true; + + print_dbg_info(&pdev->dev, eng_grps); + return ret; +err: + otx_cpt_cleanup_eng_grps(pdev, eng_grps); + return ret; +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h new file mode 100644 index 0000000000..8620ac87a4 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.h @@ -0,0 +1,180 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTPF_UCODE_H +#define __OTX_CPTPF_UCODE_H + +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/module.h> +#include "otx_cpt_hw_types.h" + +/* CPT ucode name maximum length */ +#define OTX_CPT_UCODE_NAME_LENGTH 64 +/* + * On OcteonTX 83xx platform, only one type of engines is allowed to be + * attached to an engine group. + */ +#define OTX_CPT_MAX_ETYPES_PER_GRP 1 + +/* Default tar archive file names */ +#define OTX_CPT_UCODE_TAR_FILE_NAME "cpt8x-mc.tar" + +/* CPT ucode alignment */ +#define OTX_CPT_UCODE_ALIGNMENT 128 + +/* CPT ucode signature size */ +#define OTX_CPT_UCODE_SIGN_LEN 256 + +/* Microcode version string length */ +#define OTX_CPT_UCODE_VER_STR_SZ 44 + +/* Maximum number of supported engines/cores on OcteonTX 83XX platform */ +#define OTX_CPT_MAX_ENGINES 64 + +#define OTX_CPT_ENGS_BITMASK_LEN (OTX_CPT_MAX_ENGINES/(BITS_PER_BYTE * \ + sizeof(unsigned long))) + +/* Microcode types */ +enum otx_cpt_ucode_type { + OTX_CPT_AE_UC_TYPE = 1, /* AE-MAIN */ + OTX_CPT_SE_UC_TYPE1 = 20, /* SE-MAIN - combination of 21 and 22 */ + OTX_CPT_SE_UC_TYPE2 = 21, /* Fast Path IPSec + AirCrypto */ + OTX_CPT_SE_UC_TYPE3 = 22, /* + * Hash + HMAC + FlexiCrypto + RNG + Full + * Feature IPSec + AirCrypto + Kasumi + */ +}; + +struct otx_cpt_bitmap { + unsigned long bits[OTX_CPT_ENGS_BITMASK_LEN]; + int size; +}; + +struct otx_cpt_engines { + int type; + int count; +}; + +/* Microcode version number */ +struct otx_cpt_ucode_ver_num { + u8 nn; + u8 xx; + u8 yy; + u8 zz; +}; + +struct otx_cpt_ucode_hdr { + struct otx_cpt_ucode_ver_num ver_num; + u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ]; + __be32 code_length; + u32 padding[3]; +}; + +struct otx_cpt_ucode { + u8 ver_str[OTX_CPT_UCODE_VER_STR_SZ];/* + * ucode version in readable format + */ + struct otx_cpt_ucode_ver_num ver_num;/* ucode version number */ + char filename[OTX_CPT_UCODE_NAME_LENGTH]; /* ucode filename */ + dma_addr_t dma; /* phys address of ucode image */ + dma_addr_t align_dma; /* aligned phys address of ucode image */ + void *va; /* virt address of ucode image */ + void *align_va; /* aligned virt address of ucode image */ + u32 size; /* ucode image size */ + int type; /* ucode image type SE or AE */ +}; + +struct tar_ucode_info_t { + struct list_head list; + struct otx_cpt_ucode ucode;/* microcode information */ + const u8 *ucode_ptr; /* pointer to microcode in tar archive */ +}; + +/* Maximum and current number of engines available for all engine groups */ +struct otx_cpt_engs_available { + int max_se_cnt; + int max_ae_cnt; + int se_cnt; + int ae_cnt; +}; + +/* Engines reserved to an engine group */ +struct otx_cpt_engs_rsvd { + int type; /* engine type */ + int count; /* number of engines attached */ + int offset; /* constant offset of engine type in the bitmap */ + unsigned long *bmap; /* attached engines bitmap */ + struct otx_cpt_ucode *ucode; /* ucode used by these engines */ +}; + +struct otx_cpt_mirror_info { + int is_ena; /* + * is mirroring enabled, it is set only for engine + * group which mirrors another engine group + */ + int idx; /* + * index of engine group which is mirrored by this + * group, set only for engine group which mirrors + * another group + */ + int ref_count; /* + * number of times this engine group is mirrored by + * other groups, this is set only for engine group + * which is mirrored by other group(s) + */ +}; + +struct otx_cpt_eng_grp_info { + struct otx_cpt_eng_grps *g; /* pointer to engine_groups structure */ + struct device_attribute info_attr; /* group info entry attr */ + /* engines attached */ + struct otx_cpt_engs_rsvd engs[OTX_CPT_MAX_ETYPES_PER_GRP]; + /* Microcode information */ + struct otx_cpt_ucode ucode[OTX_CPT_MAX_ETYPES_PER_GRP]; + /* sysfs info entry name */ + char sysfs_info_name[OTX_CPT_UCODE_NAME_LENGTH]; + /* engine group mirroring information */ + struct otx_cpt_mirror_info mirror; + int idx; /* engine group index */ + bool is_enabled; /* + * is engine group enabled, engine group is enabled + * when it has engines attached and ucode loaded + */ +}; + +struct otx_cpt_eng_grps { + struct otx_cpt_eng_grp_info grp[OTX_CPT_MAX_ENGINE_GROUPS]; + struct device_attribute ucode_load_attr;/* ucode load attr */ + struct otx_cpt_engs_available avail; + struct mutex lock; + void *obj; + int engs_num; /* total number of engines supported */ + int eng_types_supported; /* engine types supported SE, AE */ + u8 eng_ref_cnt[OTX_CPT_MAX_ENGINES];/* engines reference count */ + bool is_ucode_load_created; /* is ucode_load sysfs entry created */ + bool is_first_try; /* is this first try to create kcrypto engine grp */ + bool is_rdonly; /* do engine groups configuration can be modified */ +}; + +int otx_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, int pf_type); +void otx_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps); +int otx_cpt_try_create_default_eng_grps(struct pci_dev *pdev, + struct otx_cpt_eng_grps *eng_grps, + int pf_type); +void otx_cpt_set_eng_grps_is_rdonly(struct otx_cpt_eng_grps *eng_grps, + bool is_rdonly); +int otx_cpt_uc_supports_eng_type(struct otx_cpt_ucode *ucode, int eng_type); +int otx_cpt_eng_grp_has_eng_type(struct otx_cpt_eng_grp_info *eng_grp, + int eng_type); + +#endif /* __OTX_CPTPF_UCODE_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf.h b/drivers/crypto/marvell/octeontx/otx_cptvf.h new file mode 100644 index 0000000000..dd02f21659 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_H +#define __OTX_CPTVF_H + +#include <linux/list.h> +#include <linux/interrupt.h> +#include <linux/device.h> +#include "otx_cpt_common.h" +#include "otx_cptvf_reqmgr.h" + +/* Flags to indicate the features supported */ +#define OTX_CPT_FLAG_DEVICE_READY BIT(1) +#define otx_cpt_device_ready(cpt) ((cpt)->flags & OTX_CPT_FLAG_DEVICE_READY) +/* Default command queue length */ +#define OTX_CPT_CMD_QLEN (4*2046) +#define OTX_CPT_CMD_QCHUNK_SIZE 1023 +#define OTX_CPT_NUM_QS_PER_VF 1 + +struct otx_cpt_cmd_chunk { + u8 *head; + dma_addr_t dma_addr; + u32 size; /* Chunk size, max OTX_CPT_INST_CHUNK_MAX_SIZE */ + struct list_head nextchunk; +}; + +struct otx_cpt_cmd_queue { + u32 idx; /* Command queue host write idx */ + u32 num_chunks; /* Number of command chunks */ + struct otx_cpt_cmd_chunk *qhead;/* + * Command queue head, instructions + * are inserted here + */ + struct otx_cpt_cmd_chunk *base; + struct list_head chead; +}; + +struct otx_cpt_cmd_qinfo { + u32 qchunksize; /* Command queue chunk size */ + struct otx_cpt_cmd_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cpt_pending_qinfo { + u32 num_queues; /* Number of queues supported */ + struct otx_cpt_pending_queue queue[OTX_CPT_NUM_QS_PER_VF]; +}; + +#define for_each_pending_queue(qinfo, q, i) \ + for (i = 0, q = &qinfo->queue[i]; i < qinfo->num_queues; i++, \ + q = &qinfo->queue[i]) + +struct otx_cptvf_wqe { + struct tasklet_struct twork; + struct otx_cptvf *cptvf; +}; + +struct otx_cptvf_wqe_info { + struct otx_cptvf_wqe vq_wqe[OTX_CPT_NUM_QS_PER_VF]; +}; + +struct otx_cptvf { + u16 flags; /* Flags to hold device status bits */ + u8 vfid; /* Device Index 0...OTX_CPT_MAX_VF_NUM */ + u8 num_vfs; /* Number of enabled VFs */ + u8 vftype; /* VF type of SE_TYPE(2) or AE_TYPE(1) */ + u8 vfgrp; /* VF group (0 - 8) */ + u8 node; /* Operating node: Bits (46:44) in BAR0 address */ + u8 priority; /* + * VF priority ring: 1-High proirity round + * robin ring;0-Low priority round robin ring; + */ + struct pci_dev *pdev; /* Pci device handle */ + void __iomem *reg_base; /* Register start address */ + void *wqe_info; /* BH worker info */ + /* MSI-X */ + cpumask_var_t affinity_mask[OTX_CPT_VF_MSIX_VECTORS]; + /* Command and Pending queues */ + u32 qsize; + u32 num_queues; + struct otx_cpt_cmd_qinfo cqinfo; /* Command queue information */ + struct otx_cpt_pending_qinfo pqinfo; /* Pending queue information */ + /* VF-PF mailbox communication */ + bool pf_acked; + bool pf_nacked; +}; + +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf); +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group); +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf); +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf); +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf); +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf); +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val); + +#endif /* __OTX_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c new file mode 100644 index 0000000000..1c2c870e88 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -0,0 +1,1743 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <crypto/aes.h> +#include <crypto/authenc.h> +#include <crypto/cryptd.h> +#include <crypto/des.h> +#include <crypto/internal/aead.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> +#include <crypto/xts.h> +#include <crypto/scatterwalk.h> +#include <linux/rtnetlink.h> +#include <linux/sort.h> +#include <linux/module.h> +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define CPT_MAX_VF_NUM 64 +/* Size of salt in AES GCM mode */ +#define AES_GCM_SALT_SIZE 4 +/* Size of IV in AES GCM mode */ +#define AES_GCM_IV_SIZE 8 +/* Size of ICV (Integrity Check Value) in AES GCM mode */ +#define AES_GCM_ICV_SIZE 16 +/* Offset of IV in AES GCM mode */ +#define AES_GCM_IV_OFFSET 8 +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == OTX_CPT_DMA_GATHER_SCATTER) ? (1 << 7) : 0) + +/* Truncated SHA digest size */ +#define SHA1_TRUNC_DIGEST_SIZE 12 +#define SHA256_TRUNC_DIGEST_SIZE 16 +#define SHA384_TRUNC_DIGEST_SIZE 24 +#define SHA512_TRUNC_DIGEST_SIZE 32 + +static DEFINE_MUTEX(mutex); +static int is_crypto_registered; + +struct cpt_device_desc { + enum otx_cptpf_type pf_type; + struct pci_dev *dev; + int num_queues; +}; + +struct cpt_device_table { + atomic_t count; + struct cpt_device_desc desc[CPT_MAX_VF_NUM]; +}; + +static struct cpt_device_table se_devices = { + .count = ATOMIC_INIT(0) +}; + +static struct cpt_device_table ae_devices = { + .count = ATOMIC_INIT(0) +}; + +static inline int get_se_device(struct pci_dev **pdev, int *cpu_num) +{ + int count, ret = 0; + + count = atomic_read(&se_devices.count); + if (count < 1) + return -ENODEV; + + *cpu_num = get_cpu(); + + if (se_devices.desc[0].pf_type == OTX_CPT_SE) { + /* + * On OcteonTX platform there is one CPT instruction queue bound + * to each VF. We get maximum performance if one CPT queue + * is available for each cpu otherwise CPT queues need to be + * shared between cpus. + */ + if (*cpu_num >= count) + *cpu_num %= count; + *pdev = se_devices.desc[*cpu_num].dev; + } else { + pr_err("Unknown PF type %d\n", se_devices.desc[0].pf_type); + ret = -EINVAL; + } + put_cpu(); + + return ret; +} + +static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req) +{ + struct otx_cpt_req_ctx *rctx; + struct aead_request *req; + struct crypto_aead *tfm; + + req = container_of(cpt_req->areq, struct aead_request, base); + tfm = crypto_aead_reqtfm(req); + rctx = aead_request_ctx_dma(req); + if (memcmp(rctx->fctx.hmac.s.hmac_calc, + rctx->fctx.hmac.s.hmac_recv, + crypto_aead_authsize(tfm)) != 0) + return -EBADMSG; + + return 0; +} + +static void otx_cpt_aead_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct otx_cpt_req_info *cpt_req; + struct pci_dev *pdev; + + if (!cpt_info) + goto complete; + + cpt_req = cpt_info->req; + if (!status) { + /* + * When selected cipher is NULL we need to manually + * verify whether calculated hmac value matches + * received hmac value + */ + if (cpt_req->req_type == OTX_CPT_AEAD_ENC_DEC_NULL_REQ && + !cpt_req->is_enc) + status = validate_hmac_cipher_null(cpt_req); + } + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + +complete: + if (areq) + crypto_request_complete(areq, status); +} + +static void output_iv_copyback(struct crypto_async_request *areq) +{ + struct otx_cpt_req_info *req_info; + struct skcipher_request *sreq; + struct crypto_skcipher *stfm; + struct otx_cpt_req_ctx *rctx; + struct otx_cpt_enc_ctx *ctx; + u32 start, ivsize; + + sreq = container_of(areq, struct skcipher_request, base); + stfm = crypto_skcipher_reqtfm(sreq); + ctx = crypto_skcipher_ctx(stfm); + if (ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) { + rctx = skcipher_request_ctx_dma(sreq); + req_info = &rctx->cpt_req; + ivsize = crypto_skcipher_ivsize(stfm); + start = sreq->cryptlen - ivsize; + + if (req_info->is_enc) { + scatterwalk_map_and_copy(sreq->iv, sreq->dst, start, + ivsize, 0); + } else { + if (sreq->src != sreq->dst) { + scatterwalk_map_and_copy(sreq->iv, sreq->src, + start, ivsize, 0); + } else { + memcpy(sreq->iv, req_info->iv_out, ivsize); + kfree(req_info->iv_out); + } + } + } +} + +static void otx_cpt_skcipher_callback(int status, void *arg1, void *arg2) +{ + struct otx_cpt_info_buffer *cpt_info = arg2; + struct crypto_async_request *areq = arg1; + struct pci_dev *pdev; + + if (areq) { + if (!status) + output_iv_copyback(areq); + if (cpt_info) { + pdev = cpt_info->pdev; + do_request_cleanup(pdev, cpt_info); + } + crypto_request_complete(areq, status); + } +} + +static inline void update_input_data(struct otx_cpt_req_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, inp_sg->length); + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + inp_sg = sg_next(inp_sg); + } +} + +static inline void update_output_data(struct otx_cpt_req_info *req_info, + struct scatterlist *outp_sg, + u32 offset, u32 nbytes, u32 *argcnt) +{ + req_info->rlen += nbytes; + + while (nbytes) { + u32 len = min(nbytes, outp_sg->length - offset); + u8 *ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *) (ptr + offset); + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + offset = 0; + outp_sg = sg_next(outp_sg); + } +} + +static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, + u32 *argcnt) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); + struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm); + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int ivsize = crypto_skcipher_ivsize(stfm); + u32 start = req->cryptlen - ivsize; + gfp_t flags; + + flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + if (enc) { + req_info->req.opcode.s.minor = 2; + } else { + req_info->req.opcode.s.minor = 3; + if ((ctx->cipher_type == OTX_CPT_AES_CBC || + ctx->cipher_type == OTX_CPT_DES3_CBC) && + req->src == req->dst) { + req_info->iv_out = kmalloc(ivsize, flags); + if (!req_info->iv_out) + return -ENOMEM; + + scatterwalk_map_and_copy(req_info->iv_out, req->src, + start, ivsize, 0); + } + } + /* Encryption data length */ + req_info->req.param1 = req->cryptlen; + /* Authentication data length */ + req_info->req.param2 = 0; + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR; + + if (ctx->cipher_type == OTX_CPT_AES_XTS) + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2); + else + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len); + + memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm)); + + fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.cflags); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx); + + ++(*argcnt); + + return 0; +} + +static inline u32 create_input_list(struct skcipher_request *req, u32 enc, + u32 enc_iv_len) +{ + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + int ret; + + ret = create_ctx_hdr(req, enc, &argcnt); + if (ret) + return ret; + + update_input_data(req_info, req->src, req->cryptlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline void create_output_list(struct skcipher_request *req, + u32 enc_iv_len) +{ + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + /* + * OUTPUT Buffer Processing + * AES encryption/decryption output would be + * received in the following format + * + * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----| + * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ] + */ + update_output_data(req_info, req->dst, 0, req->cryptlen, &argcnt); + req_info->outcnt = argcnt; +} + +static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 enc_iv_len = crypto_skcipher_ivsize(stfm); + struct pci_dev *pdev; + int status, cpu_num; + + /* Validate that request doesn't exceed maximum CPT supported size */ + if (req->cryptlen > OTX_CPT_MAX_REQ_SIZE) + return -E2BIG; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.flags = 0; + + status = create_input_list(req, enc, enc_iv_len); + if (status) + return status; + create_output_list(req, enc_iv_len); + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->callback = (void *)otx_cpt_skcipher_callback; + req_info->areq = &req->base; + req_info->req_type = OTX_CPT_ENC_DEC_REQ; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + req_info->ctrl.s.grp = 0; + + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + status = otx_cpt_do_request(pdev, req_info, cpu_num); + + return status; +} + +static int otx_cpt_skcipher_encrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, true); +} + +static int otx_cpt_skcipher_decrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, false); +} + +static int otx_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + const u8 *key2 = key + (keylen / 2); + const u8 *key1 = key; + int ret; + + ret = xts_verify_key(tfm, key, keylen); + if (ret) + return ret; + ctx->key_len = keylen; + memcpy(ctx->enc_key, key1, keylen / 2); + memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2); + ctx->cipher_type = OTX_CPT_AES_XTS; + switch (ctx->key_len) { + case 2 * AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case 2 * AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int cpt_des_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (keylen != DES3_EDE_KEY_SIZE) + return -EINVAL; + + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return 0; +} + +static int cpt_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return 0; +} + +static int otx_cpt_skcipher_cbc_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CBC); +} + +static int otx_cpt_skcipher_ecb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_ECB); +} + +static int otx_cpt_skcipher_cfb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX_CPT_AES_CFB); +} + +static int otx_cpt_skcipher_cbc_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_CBC); +} + +static int otx_cpt_skcipher_ecb_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX_CPT_DES3_ECB); +} + +static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm) +{ + struct otx_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + memset(ctx, 0, sizeof(*ctx)); + /* + * Additional memory for skcipher_request is + * allocated since the cryptd daemon uses + * this memory for request_ctx information + */ + crypto_skcipher_set_reqsize_dma( + tfm, sizeof(struct otx_cpt_req_ctx) + + sizeof(struct skcipher_request)); + + return 0; +} + +static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + + ctx->cipher_type = cipher_type; + ctx->mac_type = mac_type; + + /* + * When selected cipher is NULL we use HMAC opcode instead of + * FLEXICRYPTO opcode therefore we don't need to use HASH algorithms + * for calculating ipad and opad + */ + if (ctx->cipher_type != OTX_CPT_CIPHER_NULL) { + switch (ctx->mac_type) { + case OTX_CPT_SHA1: + ctx->hashalg = crypto_alloc_shash("sha1", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA256: + ctx->hashalg = crypto_alloc_shash("sha256", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA384: + ctx->hashalg = crypto_alloc_shash("sha384", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX_CPT_SHA512: + ctx->hashalg = crypto_alloc_shash("sha512", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + } + } + + crypto_aead_set_reqsize_dma(tfm, sizeof(struct otx_cpt_req_ctx)); + + return 0; +} + +static int otx_cpt_aead_cbc_aes_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA1); +} + +static int otx_cpt_aead_cbc_aes_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA256); +} + +static int otx_cpt_aead_cbc_aes_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA384); +} + +static int otx_cpt_aead_cbc_aes_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_CBC, OTX_CPT_SHA512); +} + +static int otx_cpt_aead_ecb_null_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA1); +} + +static int otx_cpt_aead_ecb_null_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA256); +} + +static int otx_cpt_aead_ecb_null_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA384); +} + +static int otx_cpt_aead_ecb_null_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_CIPHER_NULL, OTX_CPT_SHA512); +} + +static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX_CPT_AES_GCM, OTX_CPT_MAC_NULL); +} + +static void otx_cpt_aead_exit(struct crypto_aead *tfm) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + + kfree(ctx->ipad); + kfree(ctx->opad); + if (ctx->hashalg) + crypto_free_shash(ctx->hashalg); + kfree(ctx->sdesc); +} + +/* + * This is the Integrity Check Value validation (aka the authentication tag + * length) + */ +static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + + switch (ctx->mac_type) { + case OTX_CPT_SHA1: + if (authsize != SHA1_DIGEST_SIZE && + authsize != SHA1_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA1_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA256: + if (authsize != SHA256_DIGEST_SIZE && + authsize != SHA256_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA256_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA384: + if (authsize != SHA384_DIGEST_SIZE && + authsize != SHA384_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA384_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_SHA512: + if (authsize != SHA512_DIGEST_SIZE && + authsize != SHA512_TRUNC_DIGEST_SIZE) + return -EINVAL; + + if (authsize == SHA512_TRUNC_DIGEST_SIZE) + ctx->is_trunc_hmac = true; + break; + + case OTX_CPT_MAC_NULL: + if (ctx->cipher_type == OTX_CPT_AES_GCM) { + if (authsize != AES_GCM_ICV_SIZE) + return -EINVAL; + } else + return -EINVAL; + break; + + default: + return -EINVAL; + } + + tfm->authsize = authsize; + return 0; +} + +static struct otx_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg) +{ + struct otx_cpt_sdesc *sdesc; + int size; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return NULL; + + sdesc->shash.tfm = alg; + + return sdesc; +} + +static inline void swap_data32(void *buf, u32 len) +{ + cpu_to_be32_array(buf, buf, len / 4); +} + +static inline void swap_data64(void *buf, u32 len) +{ + __be64 *dst = buf; + u64 *src = buf; + int i = 0; + + for (i = 0 ; i < len / 8; i++, src++, dst++) + *dst = cpu_to_be64p(src); +} + +static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) +{ + struct sha512_state *sha512; + struct sha256_state *sha256; + struct sha1_state *sha1; + + switch (mac_type) { + case OTX_CPT_SHA1: + sha1 = (struct sha1_state *) in_pad; + swap_data32(sha1->state, SHA1_DIGEST_SIZE); + memcpy(out_pad, &sha1->state, SHA1_DIGEST_SIZE); + break; + + case OTX_CPT_SHA256: + sha256 = (struct sha256_state *) in_pad; + swap_data32(sha256->state, SHA256_DIGEST_SIZE); + memcpy(out_pad, &sha256->state, SHA256_DIGEST_SIZE); + break; + + case OTX_CPT_SHA384: + case OTX_CPT_SHA512: + sha512 = (struct sha512_state *) in_pad; + swap_data64(sha512->state, SHA512_DIGEST_SIZE); + memcpy(out_pad, &sha512->state, SHA512_DIGEST_SIZE); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int aead_hmac_init(struct crypto_aead *cipher) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); + int state_size = crypto_shash_statesize(ctx->hashalg); + int ds = crypto_shash_digestsize(ctx->hashalg); + int bs = crypto_shash_blocksize(ctx->hashalg); + int authkeylen = ctx->auth_key_len; + u8 *ipad = NULL, *opad = NULL; + int ret = 0, icount = 0; + + ctx->sdesc = alloc_sdesc(ctx->hashalg); + if (!ctx->sdesc) + return -ENOMEM; + + ctx->ipad = kzalloc(bs, GFP_KERNEL); + if (!ctx->ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + ctx->opad = kzalloc(bs, GFP_KERNEL); + if (!ctx->opad) { + ret = -ENOMEM; + goto calc_fail; + } + + ipad = kzalloc(state_size, GFP_KERNEL); + if (!ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + opad = kzalloc(state_size, GFP_KERNEL); + if (!opad) { + ret = -ENOMEM; + goto calc_fail; + } + + if (authkeylen > bs) { + ret = crypto_shash_digest(&ctx->sdesc->shash, ctx->key, + authkeylen, ipad); + if (ret) + goto calc_fail; + + authkeylen = ds; + } else { + memcpy(ipad, ctx->key, authkeylen); + } + + memset(ipad + authkeylen, 0, bs - authkeylen); + memcpy(opad, ipad, bs); + + for (icount = 0; icount < bs; icount++) { + ipad[icount] ^= 0x36; + opad[icount] ^= 0x5c; + } + + /* + * Partial Hash calculated from the software + * algorithm is retrieved for IPAD & OPAD + */ + + /* IPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, ipad, bs); + crypto_shash_export(&ctx->sdesc->shash, ipad); + ret = copy_pad(ctx->mac_type, ctx->ipad, ipad); + if (ret) + goto calc_fail; + + /* OPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, opad, bs); + crypto_shash_export(&ctx->sdesc->shash, opad); + ret = copy_pad(ctx->mac_type, ctx->opad, opad); + if (ret) + goto calc_fail; + + kfree(ipad); + kfree(opad); + + return 0; + +calc_fail: + kfree(ctx->ipad); + ctx->ipad = NULL; + kfree(ctx->opad); + ctx->opad = NULL; + kfree(ipad); + kfree(opad); + kfree(ctx->sdesc); + ctx->sdesc = NULL; + + return ret; +} + +static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); + struct crypto_authenc_key_param *param; + int enckeylen = 0, authkeylen = 0; + struct rtattr *rta = (void *)key; + int status = -EINVAL; + + if (!RTA_OK(rta, keylen)) + goto badkey; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (keylen < enckeylen) + goto badkey; + + if (keylen > OTX_CPT_MAX_KEY_SIZE) + goto badkey; + + authkeylen = keylen - enckeylen; + memcpy(ctx->key, key, keylen); + + switch (enckeylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX_CPT_AES_256_BIT; + break; + default: + /* Invalid key length */ + goto badkey; + } + + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = authkeylen; + + status = aead_hmac_init(cipher); + if (status) + goto badkey; + + return 0; +badkey: + return status; +} + +static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); + struct crypto_authenc_key_param *param; + struct rtattr *rta = (void *)key; + int enckeylen = 0; + + if (!RTA_OK(rta, keylen)) + goto badkey; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + goto badkey; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + goto badkey; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (enckeylen != 0) + goto badkey; + + if (keylen > OTX_CPT_MAX_KEY_SIZE) + goto badkey; + + memcpy(ctx->key, key, keylen); + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = keylen; + return 0; +badkey: + return -EINVAL; +} + +static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); + + /* + * For aes gcm we expect to get encryption key (16, 24, 32 bytes) + * and salt (4 bytes) + */ + switch (keylen) { + case AES_KEYSIZE_128 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_128_BIT; + ctx->enc_key_len = AES_KEYSIZE_128; + break; + case AES_KEYSIZE_192 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_192_BIT; + ctx->enc_key_len = AES_KEYSIZE_192; + break; + case AES_KEYSIZE_256 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX_CPT_AES_256_BIT; + ctx->enc_key_len = AES_KEYSIZE_256; + break; + default: + /* Invalid key and salt length */ + return -EINVAL; + } + + /* Store encryption key and salt */ + memcpy(ctx->key, key, keylen); + + return 0; +} + +static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc, + u32 *argcnt) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct otx_cpt_fc_ctx *fctx = &rctx->fctx; + int mac_len = crypto_aead_authsize(tfm); + int ds; + + rctx->ctrl_word.e.enc_data_offset = req->assoclen; + + switch (ctx->cipher_type) { + case OTX_CPT_AES_CBC: + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_CPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key + ctx->auth_key_len, + ctx->enc_key_len); + /* Copy IV to context */ + memcpy(fctx->enc.encr_iv, req->iv, crypto_aead_ivsize(tfm)); + + ds = crypto_shash_digestsize(ctx->hashalg); + if (ctx->mac_type == OTX_CPT_SHA384) + ds = SHA512_DIGEST_SIZE; + if (ctx->ipad) + memcpy(fctx->hmac.e.ipad, ctx->ipad, ds); + if (ctx->opad) + memcpy(fctx->hmac.e.opad, ctx->opad, ds); + break; + + case OTX_CPT_AES_GCM: + fctx->enc.enc_ctrl.e.iv_source = OTX_CPT_FROM_DPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key, ctx->enc_key_len); + /* Copy salt to context */ + memcpy(fctx->enc.encr_iv, ctx->key + ctx->enc_key_len, + AES_GCM_SALT_SIZE); + + rctx->ctrl_word.e.iv_offset = req->assoclen - AES_GCM_IV_OFFSET; + break; + + default: + /* Unknown cipher type */ + return -EINVAL; + } + rctx->ctrl_word.flags = cpu_to_be64(rctx->ctrl_word.cflags); + + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + if (enc) { + req_info->req.opcode.s.minor = 2; + req_info->req.param1 = req->cryptlen; + req_info->req.param2 = req->cryptlen + req->assoclen; + } else { + req_info->req.opcode.s.minor = 3; + req_info->req.param1 = req->cryptlen - mac_len; + req_info->req.param2 = req->cryptlen + req->assoclen - mac_len; + } + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type; + fctx->enc.enc_ctrl.e.mac_len = mac_len; + fctx->enc.enc_ctrl.flags = cpu_to_be64(fctx->enc.enc_ctrl.cflags); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx_cpt_fc_ctx); + ++(*argcnt); + + return 0; +} + +static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, + u32 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + + req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; + req_info->ctrl.s.se_req = OTX_CPT_SE_CORE_REQ; + req_info->req.opcode.s.major = OTX_CPT_MAJOR_OP_HMAC | + DMA_MODE_FLAG(OTX_CPT_DMA_GATHER_SCATTER); + req_info->is_trunc_hmac = ctx->is_trunc_hmac; + + req_info->req.opcode.s.minor = 0; + req_info->req.param1 = ctx->auth_key_len; + req_info->req.param2 = ctx->mac_type << 8; + + /* Add authentication key */ + req_info->in[*argcnt].vptr = ctx->key; + req_info->in[*argcnt].size = round_up(ctx->auth_key_len, 8); + req_info->req.dlen += round_up(ctx->auth_key_len, 8); + ++(*argcnt); + + return 0; +} + +static inline u32 create_aead_input_list(struct aead_request *req, u32 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen = req->cryptlen + req->assoclen; + u32 status, argcnt = 0; + + status = create_aead_ctx_hdr(req, enc, &argcnt); + if (status) + return status; + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline u32 create_aead_output_list(struct aead_request *req, u32 enc, + u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0, outputlen = 0; + + if (enc) + outputlen = req->cryptlen + req->assoclen + mac_len; + else + outputlen = req->cryptlen + req->assoclen - mac_len; + + update_output_data(req_info, req->dst, 0, outputlen, &argcnt); + req_info->outcnt = argcnt; + + return 0; +} + +static inline u32 create_aead_null_input_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen, argcnt = 0; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + create_hmac_ctx_hdr(req, &argcnt, enc); + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->incnt = argcnt; + + return 0; +} + +static inline u32 create_aead_null_output_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct scatterlist *dst; + u8 *ptr = NULL; + int argcnt = 0, status, offset; + u32 inputlen; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + /* + * If source and destination are different + * then copy payload to destination + */ + if (req->src != req->dst) { + + ptr = kmalloc(inputlen, (req_info->areq->flags & + CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC); + if (!ptr) { + status = -ENOMEM; + goto error; + } + + status = sg_copy_to_buffer(req->src, sg_nents(req->src), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error_free; + } + status = sg_copy_from_buffer(req->dst, sg_nents(req->dst), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error_free; + } + kfree(ptr); + } + + if (enc) { + /* + * In an encryption scenario hmac needs + * to be appended after payload + */ + dst = req->dst; + offset = inputlen; + while (offset >= dst->length) { + offset -= dst->length; + dst = sg_next(dst); + if (!dst) { + status = -ENOENT; + goto error; + } + } + + update_output_data(req_info, dst, offset, mac_len, &argcnt); + } else { + /* + * In a decryption scenario calculated hmac for received + * payload needs to be compare with hmac received + */ + status = sg_copy_buffer(req->src, sg_nents(req->src), + rctx->fctx.hmac.s.hmac_recv, mac_len, + inputlen, true); + if (status != mac_len) { + status = -EINVAL; + goto error; + } + + req_info->out[argcnt].vptr = rctx->fctx.hmac.s.hmac_calc; + req_info->out[argcnt].size = mac_len; + argcnt++; + } + + req_info->outcnt = argcnt; + return 0; + +error_free: + kfree(ptr); +error: + return status; +} + +static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) +{ + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct pci_dev *pdev; + u32 status, cpu_num; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.flags = 0; + + req_info->callback = otx_cpt_aead_callback; + req_info->areq = &req->base; + req_info->req_type = reg_type; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + + switch (reg_type) { + case OTX_CPT_AEAD_ENC_DEC_REQ: + status = create_aead_input_list(req, enc); + if (status) + return status; + status = create_aead_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + case OTX_CPT_AEAD_ENC_DEC_NULL_REQ: + status = create_aead_null_input_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + status = create_aead_null_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + default: + return -EINVAL; + } + + /* Validate that request doesn't exceed maximum CPT supported size */ + if (req_info->req.param1 > OTX_CPT_MAX_REQ_SIZE || + req_info->req.param2 > OTX_CPT_MAX_REQ_SIZE) + return -E2BIG; + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->ctrl.s.grp = 0; + + status = otx_cpt_do_request(pdev, req_info, cpu_num); + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + return status; +} + +static int otx_cpt_aead_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, true); +} + +static int otx_cpt_aead_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_REQ, false); +} + +static int otx_cpt_aead_null_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, true); +} + +static int otx_cpt_aead_null_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX_CPT_AEAD_ENC_DEC_NULL_REQ, false); +} + +static struct skcipher_alg otx_cpt_skciphers[] = { { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "cpt_xts_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_xts_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cpt_cbc_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_cbc_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "cpt_ecb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_ecb_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cfb(aes)", + .base.cra_driver_name = "cpt_cfb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx_cpt_skcipher_cfb_aes_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cpt_cbc_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = otx_cpt_skcipher_cbc_des3_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "cpt_ecb_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx_cpt_des3_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx_cpt_enc_dec_init, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = 0, + .setkey = otx_cpt_skcipher_ecb_des3_setkey, + .encrypt = otx_cpt_skcipher_encrypt, + .decrypt = otx_cpt_skcipher_decrypt, +} }; + +static struct aead_alg otx_cpt_aeads[] = { { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha1_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha1_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha256_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha256_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha384_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha384_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha512_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_cbc_aes_sha512_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha1),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha1_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha1_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha256_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha256_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha384_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha384_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha512_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_ecb_null_sha512_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_null_encrypt, + .decrypt = otx_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "cpt_rfc4106_gcm_aes", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx_cpt_aead_gcm_aes_init, + .exit = otx_cpt_aead_exit, + .setkey = otx_cpt_aead_gcm_aes_setkey, + .setauthsize = otx_cpt_aead_set_authsize, + .encrypt = otx_cpt_aead_encrypt, + .decrypt = otx_cpt_aead_decrypt, + .ivsize = AES_GCM_IV_SIZE, + .maxauthsize = AES_GCM_ICV_SIZE, +} }; + +static inline int is_any_alg_used(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++) + if (refcount_read(&otx_cpt_skciphers[i].base.cra_refcnt) != 1) + return true; + for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++) + if (refcount_read(&otx_cpt_aeads[i].base.cra_refcnt) != 1) + return true; + return false; +} + +static inline int cpt_register_algs(void) +{ + int i, err = 0; + + if (!IS_ENABLED(CONFIG_DM_CRYPT)) { + for (i = 0; i < ARRAY_SIZE(otx_cpt_skciphers); i++) + otx_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + if (err) + return err; + } + + for (i = 0; i < ARRAY_SIZE(otx_cpt_aeads); i++) + otx_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads)); + if (err) { + crypto_unregister_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + return err; + } + + return 0; +} + +static inline void cpt_unregister_algs(void) +{ + crypto_unregister_skciphers(otx_cpt_skciphers, + ARRAY_SIZE(otx_cpt_skciphers)); + crypto_unregister_aeads(otx_cpt_aeads, ARRAY_SIZE(otx_cpt_aeads)); +} + +static int compare_func(const void *lptr, const void *rptr) +{ + struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + + if (ldesc->dev->devfn < rdesc->dev->devfn) + return -1; + if (ldesc->dev->devfn > rdesc->dev->devfn) + return 1; + return 0; +} + +static void swap_func(void *lptr, void *rptr, int size) +{ + struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + + swap(*ldesc, *rdesc); +} + +int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + enum otx_cptpf_type pf_type, + enum otx_cptvf_type engine_type, + int num_queues, int num_devices) +{ + int ret = 0; + int count; + + mutex_lock(&mutex); + switch (engine_type) { + case OTX_CPT_SE_TYPES: + count = atomic_read(&se_devices.count); + if (count >= CPT_MAX_VF_NUM) { + dev_err(&pdev->dev, "No space to add a new device\n"); + ret = -ENOSPC; + goto err; + } + se_devices.desc[count].pf_type = pf_type; + se_devices.desc[count].num_queues = num_queues; + se_devices.desc[count++].dev = pdev; + atomic_inc(&se_devices.count); + + if (atomic_read(&se_devices.count) == num_devices && + is_crypto_registered == false) { + if (cpt_register_algs()) { + dev_err(&pdev->dev, + "Error in registering crypto algorithms\n"); + ret = -EINVAL; + goto err; + } + try_module_get(mod); + is_crypto_registered = true; + } + sort(se_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + break; + + case OTX_CPT_AE_TYPES: + count = atomic_read(&ae_devices.count); + if (count >= CPT_MAX_VF_NUM) { + dev_err(&pdev->dev, "No space to a add new device\n"); + ret = -ENOSPC; + goto err; + } + ae_devices.desc[count].pf_type = pf_type; + ae_devices.desc[count].num_queues = num_queues; + ae_devices.desc[count++].dev = pdev; + atomic_inc(&ae_devices.count); + sort(ae_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + break; + + default: + dev_err(&pdev->dev, "Unknown VF type %d\n", engine_type); + ret = BAD_OTX_CPTVF_TYPE; + } +err: + mutex_unlock(&mutex); + return ret; +} + +void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod, + enum otx_cptvf_type engine_type) +{ + struct cpt_device_table *dev_tbl; + bool dev_found = false; + int i, j, count; + + mutex_lock(&mutex); + + dev_tbl = (engine_type == OTX_CPT_AE_TYPES) ? &ae_devices : &se_devices; + count = atomic_read(&dev_tbl->count); + for (i = 0; i < count; i++) + if (pdev == dev_tbl->desc[i].dev) { + for (j = i; j < count-1; j++) + dev_tbl->desc[j] = dev_tbl->desc[j+1]; + dev_found = true; + break; + } + + if (!dev_found) { + dev_err(&pdev->dev, "%s device not found\n", __func__); + goto exit; + } + + if (engine_type != OTX_CPT_AE_TYPES) { + if (atomic_dec_and_test(&se_devices.count) && + !is_any_alg_used()) { + cpt_unregister_algs(); + module_put(mod); + is_crypto_registered = false; + } + } else + atomic_dec(&ae_devices.count); +exit: + mutex_unlock(&mutex); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h new file mode 100644 index 0000000000..4181b5c5c3 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.h @@ -0,0 +1,190 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPT_ALGS_H +#define __OTX_CPT_ALGS_H + +#include <crypto/hash.h> +#include "otx_cpt_common.h" + +#define OTX_CPT_MAX_ENC_KEY_SIZE 32 +#define OTX_CPT_MAX_HASH_KEY_SIZE 64 +#define OTX_CPT_MAX_KEY_SIZE (OTX_CPT_MAX_ENC_KEY_SIZE + \ + OTX_CPT_MAX_HASH_KEY_SIZE) +enum otx_cpt_request_type { + OTX_CPT_ENC_DEC_REQ = 0x1, + OTX_CPT_AEAD_ENC_DEC_REQ = 0x2, + OTX_CPT_AEAD_ENC_DEC_NULL_REQ = 0x3, + OTX_CPT_PASSTHROUGH_REQ = 0x4 +}; + +enum otx_cpt_major_opcodes { + OTX_CPT_MAJOR_OP_MISC = 0x01, + OTX_CPT_MAJOR_OP_FC = 0x33, + OTX_CPT_MAJOR_OP_HMAC = 0x35, +}; + +enum otx_cpt_req_type { + OTX_CPT_AE_CORE_REQ, + OTX_CPT_SE_CORE_REQ +}; + +enum otx_cpt_cipher_type { + OTX_CPT_CIPHER_NULL = 0x0, + OTX_CPT_DES3_CBC = 0x1, + OTX_CPT_DES3_ECB = 0x2, + OTX_CPT_AES_CBC = 0x3, + OTX_CPT_AES_ECB = 0x4, + OTX_CPT_AES_CFB = 0x5, + OTX_CPT_AES_CTR = 0x6, + OTX_CPT_AES_GCM = 0x7, + OTX_CPT_AES_XTS = 0x8 +}; + +enum otx_cpt_mac_type { + OTX_CPT_MAC_NULL = 0x0, + OTX_CPT_MD5 = 0x1, + OTX_CPT_SHA1 = 0x2, + OTX_CPT_SHA224 = 0x3, + OTX_CPT_SHA256 = 0x4, + OTX_CPT_SHA384 = 0x5, + OTX_CPT_SHA512 = 0x6, + OTX_CPT_GMAC = 0x7 +}; + +enum otx_cpt_aes_key_len { + OTX_CPT_AES_128_BIT = 0x1, + OTX_CPT_AES_192_BIT = 0x2, + OTX_CPT_AES_256_BIT = 0x3 +}; + +union otx_cpt_encr_ctrl { + __be64 flags; + u64 cflags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 enc_cipher:4; + u64 reserved1:1; + u64 aes_key:2; + u64 iv_source:1; + u64 mac_type:4; + u64 reserved2:3; + u64 auth_input_type:1; + u64 mac_len:8; + u64 reserved3:8; + u64 encr_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 encr_offset:16; + u64 reserved3:8; + u64 mac_len:8; + u64 auth_input_type:1; + u64 reserved2:3; + u64 mac_type:4; + u64 iv_source:1; + u64 aes_key:2; + u64 reserved1:1; + u64 enc_cipher:4; +#endif + } e; +}; + +struct otx_cpt_cipher { + const char *name; + u8 value; +}; + +struct otx_cpt_enc_context { + union otx_cpt_encr_ctrl enc_ctrl; + u8 encr_key[32]; + u8 encr_iv[16]; +}; + +union otx_cpt_fchmac_ctx { + struct { + u8 ipad[64]; + u8 opad[64]; + } e; + struct { + u8 hmac_calc[64]; /* HMAC calculated */ + u8 hmac_recv[64]; /* HMAC received */ + } s; +}; + +struct otx_cpt_fc_ctx { + struct otx_cpt_enc_context enc; + union otx_cpt_fchmac_ctx hmac; +}; + +struct otx_cpt_enc_ctx { + u32 key_len; + u8 enc_key[OTX_CPT_MAX_KEY_SIZE]; + u8 cipher_type; + u8 key_type; +}; + +struct otx_cpt_des3_ctx { + u32 key_len; + u8 des3_key[OTX_CPT_MAX_KEY_SIZE]; +}; + +union otx_cpt_offset_ctrl_word { + __be64 flags; + u64 cflags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved:32; + u64 enc_data_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 enc_data_offset:16; + u64 reserved:32; +#endif + } e; +}; + +struct otx_cpt_req_ctx { + struct otx_cpt_req_info cpt_req; + union otx_cpt_offset_ctrl_word ctrl_word; + struct otx_cpt_fc_ctx fctx; +}; + +struct otx_cpt_sdesc { + struct shash_desc shash; +}; + +struct otx_cpt_aead_ctx { + u8 key[OTX_CPT_MAX_KEY_SIZE]; + struct crypto_shash *hashalg; + struct otx_cpt_sdesc *sdesc; + u8 *ipad; + u8 *opad; + u32 enc_key_len; + u32 auth_key_len; + u8 cipher_type; + u8 mac_type; + u8 key_type; + u8 is_trunc_hmac; +}; +int otx_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + enum otx_cptpf_type pf_type, + enum otx_cptvf_type engine_type, + int num_queues, int num_devices); +void otx_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod, + enum otx_cptvf_type engine_type); +void otx_cpt_callback(int status, void *arg, void *req); + +#endif /* __OTX_CPT_ALGS_H */ diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_main.c b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c new file mode 100644 index 0000000000..88a41d1ca5 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_main.c @@ -0,0 +1,976 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/interrupt.h> +#include <linux/module.h> +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" +#include "otx_cptvf_reqmgr.h" + +#define DRV_NAME "octeontx-cptvf" +#define DRV_VERSION "1.0" + +static void vq_work_handler(unsigned long data) +{ + struct otx_cptvf_wqe_info *cwqe_info = + (struct otx_cptvf_wqe_info *) data; + + otx_cpt_post_process(&cwqe_info->vq_wqe[0]); +} + +static int init_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = kzalloc(sizeof(*cwqe_info), GFP_KERNEL); + if (!cwqe_info) + return -ENOMEM; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Creating VQ worker threads (%d)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) { + tasklet_init(&cwqe_info->vq_wqe[i].twork, vq_work_handler, + (u64)cwqe_info); + cwqe_info->vq_wqe[i].cptvf = cptvf; + } + cptvf->wqe_info = cwqe_info; + + return 0; +} + +static void cleanup_worker_threads(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + struct otx_cptvf_wqe_info *cwqe_info; + int i; + + cwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + if (!cwqe_info) + return; + + if (cptvf->num_queues) { + dev_dbg(&pdev->dev, "Cleaning VQ worker threads (%u)\n", + cptvf->num_queues); + } + + for (i = 0; i < cptvf->num_queues; i++) + tasklet_kill(&cwqe_info->vq_wqe[i].twork); + + kfree_sensitive(cwqe_info); + cptvf->wqe_info = NULL; +} + +static void free_pending_queues(struct otx_cpt_pending_qinfo *pqinfo) +{ + struct otx_cpt_pending_queue *queue; + int i; + + for_each_pending_queue(pqinfo, queue, i) { + if (!queue->head) + continue; + + /* free single queue */ + kfree_sensitive((queue->head)); + queue->front = 0; + queue->rear = 0; + queue->qlen = 0; + } + pqinfo->num_queues = 0; +} + +static int alloc_pending_queues(struct otx_cpt_pending_qinfo *pqinfo, u32 qlen, + u32 num_queues) +{ + struct otx_cpt_pending_queue *queue = NULL; + int ret; + u32 i; + + pqinfo->num_queues = num_queues; + + for_each_pending_queue(pqinfo, queue, i) { + queue->head = kcalloc(qlen, sizeof(*queue->head), GFP_KERNEL); + if (!queue->head) { + ret = -ENOMEM; + goto pending_qfail; + } + + queue->pending_count = 0; + queue->front = 0; + queue->rear = 0; + queue->qlen = qlen; + + /* init queue spin lock */ + spin_lock_init(&queue->lock); + } + return 0; + +pending_qfail: + free_pending_queues(pqinfo); + + return ret; +} + +static int init_pending_queues(struct otx_cptvf *cptvf, u32 qlen, + u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + if (!num_queues) + return 0; + + ret = alloc_pending_queues(&cptvf->pqinfo, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + return ret; + } + return 0; +} + +static void cleanup_pending_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ pending queue (%u)\n", + cptvf->num_queues); + free_pending_queues(&cptvf->pqinfo); +} + +static void free_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo) +{ + struct otx_cpt_cmd_queue *queue = NULL; + struct otx_cpt_cmd_chunk *chunk = NULL; + struct pci_dev *pdev = cptvf->pdev; + int i; + + /* clean up for each queue */ + for (i = 0; i < cptvf->num_queues; i++) { + queue = &cqinfo->queue[i]; + + while (!list_empty(&cqinfo->queue[i].chead)) { + chunk = list_first_entry(&cqinfo->queue[i].chead, + struct otx_cpt_cmd_chunk, nextchunk); + + dma_free_coherent(&pdev->dev, chunk->size, + chunk->head, + chunk->dma_addr); + chunk->head = NULL; + chunk->dma_addr = 0; + list_del(&chunk->nextchunk); + kfree_sensitive(chunk); + } + queue->num_chunks = 0; + queue->idx = 0; + + } +} + +static int alloc_command_queues(struct otx_cptvf *cptvf, + struct otx_cpt_cmd_qinfo *cqinfo, + u32 qlen) +{ + struct otx_cpt_cmd_chunk *curr, *first, *last; + struct otx_cpt_cmd_queue *queue = NULL; + struct pci_dev *pdev = cptvf->pdev; + size_t q_size, c_size, rem_q_size; + u32 qcsize_bytes; + int i; + + + /* Qsize in dwords, needed for SADDR config, 1-next chunk pointer */ + cptvf->qsize = min(qlen, cqinfo->qchunksize) * + OTX_CPT_NEXT_CHUNK_PTR_SIZE + 1; + /* Qsize in bytes to create space for alignment */ + q_size = qlen * OTX_CPT_INST_SIZE; + + qcsize_bytes = cqinfo->qchunksize * OTX_CPT_INST_SIZE; + + /* per queue initialization */ + for (i = 0; i < cptvf->num_queues; i++) { + rem_q_size = q_size; + first = NULL; + last = NULL; + + queue = &cqinfo->queue[i]; + INIT_LIST_HEAD(&queue->chead); + do { + curr = kzalloc(sizeof(*curr), GFP_KERNEL); + if (!curr) + goto cmd_qfail; + + c_size = (rem_q_size > qcsize_bytes) ? qcsize_bytes : + rem_q_size; + curr->head = dma_alloc_coherent(&pdev->dev, + c_size + OTX_CPT_NEXT_CHUNK_PTR_SIZE, + &curr->dma_addr, GFP_KERNEL); + if (!curr->head) { + dev_err(&pdev->dev, + "Command Q (%d) chunk (%d) allocation failed\n", + i, queue->num_chunks); + goto free_curr; + } + curr->size = c_size; + + if (queue->num_chunks == 0) { + first = curr; + queue->base = first; + } + list_add_tail(&curr->nextchunk, + &cqinfo->queue[i].chead); + + queue->num_chunks++; + rem_q_size -= c_size; + if (last) + *((u64 *)(&last->head[last->size])) = + (u64)curr->dma_addr; + + last = curr; + } while (rem_q_size); + + /* + * Make the queue circular, tie back last chunk entry to head + */ + curr = first; + *((u64 *)(&last->head[last->size])) = (u64)curr->dma_addr; + queue->qhead = curr; + } + return 0; +free_curr: + kfree(curr); +cmd_qfail: + free_command_queues(cptvf, cqinfo); + return -ENOMEM; +} + +static int init_command_queues(struct otx_cptvf *cptvf, u32 qlen) +{ + struct pci_dev *pdev = cptvf->pdev; + int ret; + + /* setup command queues */ + ret = alloc_command_queues(cptvf, &cptvf->cqinfo, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to allocate command queues (%u)\n", + cptvf->num_queues); + return ret; + } + return ret; +} + +static void cleanup_command_queues(struct otx_cptvf *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + + if (!cptvf->num_queues) + return; + + dev_dbg(&pdev->dev, "Cleaning VQ command queue (%u)\n", + cptvf->num_queues); + free_command_queues(cptvf, &cptvf->cqinfo); +} + +static void cptvf_sw_cleanup(struct otx_cptvf *cptvf) +{ + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + cleanup_command_queues(cptvf); +} + +static int cptvf_sw_init(struct otx_cptvf *cptvf, u32 qlen, u32 num_queues) +{ + struct pci_dev *pdev = cptvf->pdev; + u32 max_dev_queues = 0; + int ret; + + max_dev_queues = OTX_CPT_NUM_QS_PER_VF; + /* possible cpus */ + num_queues = min_t(u32, num_queues, max_dev_queues); + cptvf->num_queues = num_queues; + + ret = init_command_queues(cptvf, qlen); + if (ret) { + dev_err(&pdev->dev, "Failed to setup command queues (%u)\n", + num_queues); + return ret; + } + + ret = init_pending_queues(cptvf, qlen, num_queues); + if (ret) { + dev_err(&pdev->dev, "Failed to setup pending queues (%u)\n", + num_queues); + goto setup_pqfail; + } + + /* Create worker threads for BH processing */ + ret = init_worker_threads(cptvf); + if (ret) { + dev_err(&pdev->dev, "Failed to setup worker threads\n"); + goto init_work_fail; + } + return 0; + +init_work_fail: + cleanup_worker_threads(cptvf); + cleanup_pending_queues(cptvf); + +setup_pqfail: + cleanup_command_queues(cptvf); + + return ret; +} + +static void cptvf_free_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + irq_set_affinity_hint(pci_irq_vector(cptvf->pdev, vec), NULL); + free_cpumask_var(cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_ctl(struct otx_cptvf *cptvf, bool val) +{ + union otx_cptx_vqx_ctl vqx_ctl; + + vqx_ctl.u = readq(cptvf->reg_base + OTX_CPT_VQX_CTL(0)); + vqx_ctl.s.ena = val; + writeq(vqx_ctl.u, cptvf->reg_base + OTX_CPT_VQX_CTL(0)); +} + +void otx_cptvf_write_vq_doorbell(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_doorbell vqx_dbell; + + vqx_dbell.u = readq(cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); + vqx_dbell.s.dbell_cnt = val * 8; /* Num of Instructions * 8 words */ + writeq(vqx_dbell.u, cptvf->reg_base + OTX_CPT_VQX_DOORBELL(0)); +} + +static void cptvf_write_vq_inprog(struct otx_cptvf *cptvf, u8 val) +{ + union otx_cptx_vqx_inprog vqx_inprg; + + vqx_inprg.u = readq(cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); + vqx_inprg.s.inflight = val; + writeq(vqx_inprg.u, cptvf->reg_base + OTX_CPT_VQX_INPROG(0)); +} + +static void cptvf_write_vq_done_numwait(struct otx_cptvf *cptvf, u32 val) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.num_wait = val; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + +static u32 cptvf_read_vq_done_numwait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.num_wait; +} + +static void cptvf_write_vq_done_timewait(struct otx_cptvf *cptvf, u16 time) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + vqx_dwait.s.time_wait = time; + writeq(vqx_dwait.u, cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); +} + + +static u16 cptvf_read_vq_done_timewait(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_wait vqx_dwait; + + vqx_dwait.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_WAIT(0)); + return vqx_dwait.s.time_wait; +} + +static void cptvf_enable_swerr_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable SWERR interrupts for the requested VF */ + vqx_misc_ena.s.swerr = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_mbox_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_ena_w1s vqx_misc_ena; + + vqx_misc_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); + /* Enable MBOX interrupt for the requested VF */ + vqx_misc_ena.s.mbox = 1; + writeq(vqx_misc_ena.u, cptvf->reg_base + OTX_CPT_VQX_MISC_ENA_W1S(0)); +} + +static void cptvf_enable_done_interrupts(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done_ena_w1s vqx_done_ena; + + vqx_done_ena.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); + /* Enable DONE interrupt for the requested VF */ + vqx_done_ena.s.done = 1; + writeq(vqx_done_ena.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ENA_W1S(0)); +} + +static void cptvf_clear_dovf_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.dovf = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_irde_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.irde = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_nwrp_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.nwrp = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_mbox_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.mbox = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static void cptvf_clear_swerr_intr(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_misc_int vqx_misc_int; + + vqx_misc_int.u = readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); + /* W1C for the VF */ + vqx_misc_int.s.swerr = 1; + writeq(vqx_misc_int.u, cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static u64 cptvf_read_vf_misc_intr_status(struct otx_cptvf *cptvf) +{ + return readq(cptvf->reg_base + OTX_CPT_VQX_MISC_INT(0)); +} + +static irqreturn_t cptvf_misc_intr_handler(int __always_unused irq, + void *arg) +{ + struct otx_cptvf *cptvf = arg; + struct pci_dev *pdev = cptvf->pdev; + u64 intr; + + intr = cptvf_read_vf_misc_intr_status(cptvf); + /* Check for MISC interrupt types */ + if (likely(intr & OTX_CPT_VF_INTR_MBOX_MASK)) { + dev_dbg(&pdev->dev, "Mailbox interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + otx_cptvf_handle_mbox_intr(cptvf); + cptvf_clear_mbox_intr(cptvf); + } else if (unlikely(intr & OTX_CPT_VF_INTR_DOVF_MASK)) { + cptvf_clear_dovf_intr(cptvf); + /* Clear doorbell count */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + dev_err(&pdev->dev, + "Doorbell overflow error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_IRDE_MASK)) { + cptvf_clear_irde_intr(cptvf); + dev_err(&pdev->dev, + "Instruction NCB read error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_NWRP_MASK)) { + cptvf_clear_nwrp_intr(cptvf); + dev_err(&pdev->dev, + "NCB response write error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else if (unlikely(intr & OTX_CPT_VF_INTR_SERR_MASK)) { + cptvf_clear_swerr_intr(cptvf); + dev_err(&pdev->dev, + "Software error interrupt 0x%llx on CPT VF %d\n", + intr, cptvf->vfid); + } else { + dev_err(&pdev->dev, "Unhandled interrupt in OTX_CPT VF %d\n", + cptvf->vfid); + } + + return IRQ_HANDLED; +} + +static inline struct otx_cptvf_wqe *get_cptvf_vq_wqe(struct otx_cptvf *cptvf, + int qno) +{ + struct otx_cptvf_wqe_info *nwqe_info; + + if (unlikely(qno >= cptvf->num_queues)) + return NULL; + nwqe_info = (struct otx_cptvf_wqe_info *)cptvf->wqe_info; + + return &nwqe_info->vq_wqe[qno]; +} + +static inline u32 cptvf_read_vq_done_count(struct otx_cptvf *cptvf) +{ + union otx_cptx_vqx_done vqx_done; + + vqx_done.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE(0)); + return vqx_done.s.done; +} + +static inline void cptvf_write_vq_done_ack(struct otx_cptvf *cptvf, + u32 ackcnt) +{ + union otx_cptx_vqx_done_ack vqx_dack_cnt; + + vqx_dack_cnt.u = readq(cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); + vqx_dack_cnt.s.done_ack = ackcnt; + writeq(vqx_dack_cnt.u, cptvf->reg_base + OTX_CPT_VQX_DONE_ACK(0)); +} + +static irqreturn_t cptvf_done_intr_handler(int __always_unused irq, + void *cptvf_dev) +{ + struct otx_cptvf *cptvf = (struct otx_cptvf *)cptvf_dev; + struct pci_dev *pdev = cptvf->pdev; + /* Read the number of completions */ + u32 intr = cptvf_read_vq_done_count(cptvf); + + if (intr) { + struct otx_cptvf_wqe *wqe; + + /* + * Acknowledge the number of scheduled completions for + * processing + */ + cptvf_write_vq_done_ack(cptvf, intr); + wqe = get_cptvf_vq_wqe(cptvf, 0); + if (unlikely(!wqe)) { + dev_err(&pdev->dev, "No work to schedule for VF (%d)\n", + cptvf->vfid); + return IRQ_NONE; + } + tasklet_hi_schedule(&wqe->twork); + } + + return IRQ_HANDLED; +} + +static void cptvf_set_irq_affinity(struct otx_cptvf *cptvf, int vec) +{ + struct pci_dev *pdev = cptvf->pdev; + int cpu; + + if (!zalloc_cpumask_var(&cptvf->affinity_mask[vec], + GFP_KERNEL)) { + dev_err(&pdev->dev, + "Allocation failed for affinity_mask for VF %d\n", + cptvf->vfid); + return; + } + + cpu = cptvf->vfid % num_online_cpus(); + cpumask_set_cpu(cpumask_local_spread(cpu, cptvf->node), + cptvf->affinity_mask[vec]); + irq_set_affinity_hint(pci_irq_vector(pdev, vec), + cptvf->affinity_mask[vec]); +} + +static void cptvf_write_vq_saddr(struct otx_cptvf *cptvf, u64 val) +{ + union otx_cptx_vqx_saddr vqx_saddr; + + vqx_saddr.u = val; + writeq(vqx_saddr.u, cptvf->reg_base + OTX_CPT_VQX_SADDR(0)); +} + +static void cptvf_device_init(struct otx_cptvf *cptvf) +{ + u64 base_addr = 0; + + /* Disable the VQ */ + cptvf_write_vq_ctl(cptvf, 0); + /* Reset the doorbell */ + otx_cptvf_write_vq_doorbell(cptvf, 0); + /* Clear inflight */ + cptvf_write_vq_inprog(cptvf, 0); + /* Write VQ SADDR */ + base_addr = (u64)(cptvf->cqinfo.queue[0].qhead->dma_addr); + cptvf_write_vq_saddr(cptvf, base_addr); + /* Configure timerhold / coalescence */ + cptvf_write_vq_done_timewait(cptvf, OTX_CPT_TIMER_HOLD); + cptvf_write_vq_done_numwait(cptvf, OTX_CPT_COUNT_HOLD); + /* Enable the VQ */ + cptvf_write_vq_ctl(cptvf, 1); + /* Flag the VF ready */ + cptvf->flags |= OTX_CPT_FLAG_DEVICE_READY; +} + +static ssize_t vf_type_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + char *msg; + + switch (cptvf->vftype) { + case OTX_CPT_AE_TYPES: + msg = "AE"; + break; + + case OTX_CPT_SE_TYPES: + msg = "SE"; + break; + + default: + msg = "Invalid"; + } + + return sysfs_emit(buf, "%s\n", msg); +} + +static ssize_t vf_engine_group_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", cptvf->vfgrp); +} + +static ssize_t vf_engine_group_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + int val, ret; + + ret = kstrtoint(buf, 10, &val); + if (ret) + return ret; + + if (val < 0) + return -EINVAL; + + if (val >= OTX_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Engine group >= than max available groups %d\n", + OTX_CPT_MAX_ENGINE_GROUPS); + return -EINVAL; + } + + ret = otx_cptvf_send_vf_to_grp_msg(cptvf, val); + if (ret) + return ret; + + return count; +} + +static ssize_t vf_coalesc_time_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + cptvf_read_vq_done_timewait(cptvf)); +} + +static ssize_t vf_coalesc_num_wait_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%d\n", + cptvf_read_vq_done_numwait(cptvf)); +} + +static ssize_t vf_coalesc_time_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_TIME_WAIT || + val > OTX_CPT_COALESC_MAX_TIME_WAIT) + return -EINVAL; + + cptvf_write_vq_done_timewait(cptvf, val); + return count; +} + +static ssize_t vf_coalesc_num_wait_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx_cptvf *cptvf = dev_get_drvdata(dev); + long val; + int ret; + + ret = kstrtol(buf, 10, &val); + if (ret != 0) + return ret; + + if (val < OTX_CPT_COALESC_MIN_NUM_WAIT || + val > OTX_CPT_COALESC_MAX_NUM_WAIT) + return -EINVAL; + + cptvf_write_vq_done_numwait(cptvf, val); + return count; +} + +static DEVICE_ATTR_RO(vf_type); +static DEVICE_ATTR_RW(vf_engine_group); +static DEVICE_ATTR_RW(vf_coalesc_time_wait); +static DEVICE_ATTR_RW(vf_coalesc_num_wait); + +static struct attribute *otx_cptvf_attrs[] = { + &dev_attr_vf_type.attr, + &dev_attr_vf_engine_group.attr, + &dev_attr_vf_coalesc_time_wait.attr, + &dev_attr_vf_coalesc_num_wait.attr, + NULL +}; + +static const struct attribute_group otx_cptvf_sysfs_group = { + .attrs = otx_cptvf_attrs, +}; + +static int otx_cptvf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct otx_cptvf *cptvf; + int err; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto disable_device; + } + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable 48-bit DMA configuration\n"); + goto release_regions; + } + + /* MAP PF's configuration registers */ + cptvf->reg_base = pci_iomap(pdev, OTX_CPT_VF_PCI_CFG_BAR, 0); + if (!cptvf->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto release_regions; + } + + cptvf->node = dev_to_node(&pdev->dev); + err = pci_alloc_irq_vectors(pdev, OTX_CPT_VF_MSIX_VECTORS, + OTX_CPT_VF_MSIX_VECTORS, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for #%d msix vectors failed\n", + OTX_CPT_VF_MSIX_VECTORS); + goto unmap_region; + } + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), + cptvf_misc_intr_handler, 0, "CPT VF misc intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request misc irq\n"); + goto free_vectors; + } + + /* Enable mailbox interrupt */ + cptvf_enable_mbox_interrupts(cptvf); + cptvf_enable_swerr_interrupts(cptvf); + + /* Check cpt pf status, gets chip ID / device Id from PF if ready */ + err = otx_cptvf_check_pf_ready(cptvf); + if (err) + goto free_misc_irq; + + /* CPT VF software resources initialization */ + cptvf->cqinfo.qchunksize = OTX_CPT_CMD_QCHUNK_SIZE; + err = cptvf_sw_init(cptvf, OTX_CPT_CMD_QLEN, OTX_CPT_NUM_QS_PER_VF); + if (err) { + dev_err(dev, "cptvf_sw_init() failed\n"); + goto free_misc_irq; + } + /* Convey VQ LEN to PF */ + err = otx_cptvf_send_vq_size_msg(cptvf); + if (err) + goto sw_cleanup; + + /* CPT VF device initialization */ + cptvf_device_init(cptvf); + /* Send msg to PF to assign currnet Q to required group */ + err = otx_cptvf_send_vf_to_grp_msg(cptvf, cptvf->vfgrp); + if (err) + goto sw_cleanup; + + cptvf->priority = 1; + err = otx_cptvf_send_vf_priority_msg(cptvf); + if (err) + goto sw_cleanup; + + err = request_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), + cptvf_done_intr_handler, 0, "CPT VF done intr", + cptvf); + if (err) { + dev_err(dev, "Failed to request done irq\n"); + goto free_done_irq; + } + + /* Enable done interrupt */ + cptvf_enable_done_interrupts(cptvf); + + /* Set irq affinity masks */ + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + cptvf_set_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + + err = otx_cptvf_send_vf_up(cptvf); + if (err) + goto free_irq_affinity; + + /* Initialize algorithms and set ops */ + err = otx_cpt_crypto_init(pdev, THIS_MODULE, + cptvf->vftype == OTX_CPT_SE_TYPES ? OTX_CPT_SE : OTX_CPT_AE, + cptvf->vftype, 1, cptvf->num_vfs); + if (err) { + dev_err(dev, "Failed to register crypto algs\n"); + goto free_irq_affinity; + } + + err = sysfs_create_group(&dev->kobj, &otx_cptvf_sysfs_group); + if (err) { + dev_err(dev, "Creating sysfs entries failed\n"); + goto crypto_exit; + } + + return 0; + +crypto_exit: + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); +free_irq_affinity: + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); +free_done_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); +sw_cleanup: + cptvf_sw_cleanup(cptvf); +free_misc_irq: + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); +free_vectors: + pci_free_irq_vectors(cptvf->pdev); +unmap_region: + pci_iounmap(pdev, cptvf->reg_base); +release_regions: + pci_release_regions(pdev); +disable_device: + pci_disable_device(pdev); +clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return err; +} + +static void otx_cptvf_remove(struct pci_dev *pdev) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) { + dev_err(&pdev->dev, "Invalid CPT-VF device\n"); + return; + } + + /* Convey DOWN to PF */ + if (otx_cptvf_send_vf_down(cptvf)) { + dev_err(&pdev->dev, "PF not responding to DOWN msg\n"); + } else { + sysfs_remove_group(&pdev->dev.kobj, &otx_cptvf_sysfs_group); + otx_cpt_crypto_exit(pdev, THIS_MODULE, cptvf->vftype); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_DONE); + cptvf_free_irq_affinity(cptvf, CPT_VF_INT_VEC_E_MISC); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_DONE), cptvf); + free_irq(pci_irq_vector(pdev, CPT_VF_INT_VEC_E_MISC), cptvf); + cptvf_sw_cleanup(cptvf); + pci_free_irq_vectors(cptvf->pdev); + pci_iounmap(pdev, cptvf->reg_base); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + } +} + +/* Supported devices */ +static const struct pci_device_id otx_cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, OTX_CPT_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver otx_cptvf_pci_driver = { + .name = DRV_NAME, + .id_table = otx_cptvf_id_table, + .probe = otx_cptvf_probe, + .remove = otx_cptvf_remove, +}; + +module_pci_driver(otx_cptvf_pci_driver); + +MODULE_AUTHOR("Marvell International Ltd."); +MODULE_DESCRIPTION("Marvell OcteonTX CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, otx_cptvf_id_table); diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c new file mode 100644 index 0000000000..90fdafb7c4 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_mbox.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/delay.h> +#include "otx_cptvf.h" + +#define CPT_MBOX_MSG_TIMEOUT 2000 + +static char *get_mbox_opcode_str(int msg_opcode) +{ + char *str = "Unknown"; + + switch (msg_opcode) { + case OTX_CPT_MSG_VF_UP: + str = "UP"; + break; + + case OTX_CPT_MSG_VF_DOWN: + str = "DOWN"; + break; + + case OTX_CPT_MSG_READY: + str = "READY"; + break; + + case OTX_CPT_MSG_QLEN: + str = "QLEN"; + break; + + case OTX_CPT_MSG_QBIND_GRP: + str = "QBIND_GRP"; + break; + + case OTX_CPT_MSG_VQ_PRIORITY: + str = "VQ_PRIORITY"; + break; + + case OTX_CPT_MSG_PF_TYPE: + str = "PF_TYPE"; + break; + + case OTX_CPT_MSG_ACK: + str = "ACK"; + break; + + case OTX_CPT_MSG_NACK: + str = "NACK"; + break; + } + return str; +} + +static void dump_mbox_msg(struct otx_cpt_mbox *mbox_msg, int vf_id) +{ + char raw_data_str[OTX_CPT_MAX_MBOX_DATA_STR_SIZE]; + + hex_dump_to_buffer(mbox_msg, sizeof(struct otx_cpt_mbox), 16, 8, + raw_data_str, OTX_CPT_MAX_MBOX_DATA_STR_SIZE, false); + if (vf_id >= 0) + pr_debug("MBOX msg %s received from VF%d raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), vf_id, + raw_data_str); + else + pr_debug("MBOX msg %s received from PF raw_data %s", + get_mbox_opcode_str(mbox_msg->msg), raw_data_str); +} + +static void cptvf_send_msg_to_pf(struct otx_cptvf *cptvf, + struct otx_cpt_mbox *mbx) +{ + /* Writing mbox(1) causes interrupt */ + writeq(mbx->msg, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0)); + writeq(mbx->data, cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1)); +} + +/* Interrupt handler to handle mailbox messages from VFs */ +void otx_cptvf_handle_mbox_intr(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + /* + * MBOX[0] contains msg + * MBOX[1] contains data + */ + mbx.msg = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 0)); + mbx.data = readq(cptvf->reg_base + OTX_CPT_VFX_PF_MBOXX(0, 1)); + + dump_mbox_msg(&mbx, -1); + + switch (mbx.msg) { + case OTX_CPT_MSG_VF_UP: + cptvf->pf_acked = true; + cptvf->num_vfs = mbx.data; + break; + case OTX_CPT_MSG_READY: + cptvf->pf_acked = true; + cptvf->vfid = mbx.data; + dev_dbg(&cptvf->pdev->dev, "Received VFID %d\n", cptvf->vfid); + break; + case OTX_CPT_MSG_QBIND_GRP: + cptvf->pf_acked = true; + cptvf->vftype = mbx.data; + dev_dbg(&cptvf->pdev->dev, "VF %d type %s group %d\n", + cptvf->vfid, + ((mbx.data == OTX_CPT_SE_TYPES) ? "SE" : "AE"), + cptvf->vfgrp); + break; + case OTX_CPT_MSG_ACK: + cptvf->pf_acked = true; + break; + case OTX_CPT_MSG_NACK: + cptvf->pf_nacked = true; + break; + default: + dev_err(&cptvf->pdev->dev, "Invalid msg from PF, msg 0x%llx\n", + mbx.msg); + break; + } +} + +static int cptvf_send_msg_to_pf_timeout(struct otx_cptvf *cptvf, + struct otx_cpt_mbox *mbx) +{ + int timeout = CPT_MBOX_MSG_TIMEOUT; + int sleep = 10; + + cptvf->pf_acked = false; + cptvf->pf_nacked = false; + cptvf_send_msg_to_pf(cptvf, mbx); + /* Wait for previous message to be acked, timeout 2sec */ + while (!cptvf->pf_acked) { + if (cptvf->pf_nacked) + return -EINVAL; + msleep(sleep); + if (cptvf->pf_acked) + break; + timeout -= sleep; + if (!timeout) { + dev_err(&cptvf->pdev->dev, + "PF didn't ack to mbox msg %llx from VF%u\n", + mbx->msg, cptvf->vfid); + return -EBUSY; + } + } + return 0; +} + +/* + * Checks if VF is able to comminicate with PF + * and also gets the CPT number this VF is associated to. + */ +int otx_cptvf_check_pf_ready(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + mbx.msg = OTX_CPT_MSG_READY; + + return cptvf_send_msg_to_pf_timeout(cptvf, &mbx); +} + +/* + * Communicate VQs size to PF to program CPT(0)_PF_Q(0-15)_CTL of the VF. + * Must be ACKed. + */ +int otx_cptvf_send_vq_size_msg(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + mbx.msg = OTX_CPT_MSG_QLEN; + mbx.data = cptvf->qsize; + + return cptvf_send_msg_to_pf_timeout(cptvf, &mbx); +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int otx_cptvf_send_vf_to_grp_msg(struct otx_cptvf *cptvf, int group) +{ + struct otx_cpt_mbox mbx = {}; + int ret; + + mbx.msg = OTX_CPT_MSG_QBIND_GRP; + /* Convey group of the VF */ + mbx.data = group; + ret = cptvf_send_msg_to_pf_timeout(cptvf, &mbx); + if (ret) + return ret; + cptvf->vfgrp = group; + + return 0; +} + +/* + * Communicate VF group required to PF and get the VQ binded to that group + */ +int otx_cptvf_send_vf_priority_msg(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + mbx.msg = OTX_CPT_MSG_VQ_PRIORITY; + /* Convey group of the VF */ + mbx.data = cptvf->priority; + + return cptvf_send_msg_to_pf_timeout(cptvf, &mbx); +} + +/* + * Communicate to PF that VF is UP and running + */ +int otx_cptvf_send_vf_up(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + mbx.msg = OTX_CPT_MSG_VF_UP; + + return cptvf_send_msg_to_pf_timeout(cptvf, &mbx); +} + +/* + * Communicate to PF that VF is DOWN and running + */ +int otx_cptvf_send_vf_down(struct otx_cptvf *cptvf) +{ + struct otx_cpt_mbox mbx = {}; + + mbx.msg = OTX_CPT_MSG_VF_DOWN; + + return cptvf_send_msg_to_pf_timeout(cptvf, &mbx); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c new file mode 100644 index 0000000000..c80baf1ad9 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.c @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include "otx_cptvf.h" +#include "otx_cptvf_algs.h" + +/* Completion code size and initial value */ +#define COMPLETION_CODE_SIZE 8 +#define COMPLETION_CODE_INIT 0 + +/* SG list header size in bytes */ +#define SG_LIST_HDR_SIZE 8 + +/* Default timeout when waiting for free pending entry in us */ +#define CPT_PENTRY_TIMEOUT 1000 +#define CPT_PENTRY_STEP 50 + +/* Default threshold for stopping and resuming sender requests */ +#define CPT_IQ_STOP_MARGIN 128 +#define CPT_IQ_RESUME_MARGIN 512 + +#define CPT_DMA_ALIGN 128 + +void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req) +{ + int i; + + pr_debug("Gather list size %d\n", req->incnt); + for (i = 0; i < req->incnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->in[i].size, req->in[i].vptr, + (void *) req->in[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", + req->in[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->in[i].vptr, req->in[i].size, false); + } + + pr_debug("Scatter list size %d\n", req->outcnt); + for (i = 0; i < req->outcnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->out[i].size, req->out[i].vptr, + (void *) req->out[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->out[i].vptr, req->out[i].size, false); + } +} + +static inline struct otx_cpt_pending_entry *get_free_pending_entry( + struct otx_cpt_pending_queue *q, + int qlen) +{ + struct otx_cpt_pending_entry *ent = NULL; + + ent = &q->head[q->rear]; + if (unlikely(ent->busy)) + return NULL; + + q->rear++; + if (unlikely(q->rear == qlen)) + q->rear = 0; + + return ent; +} + +static inline u32 modulo_inc(u32 index, u32 length, u32 inc) +{ + if (WARN_ON(inc > length)) + inc = length; + + index += inc; + if (unlikely(index >= length)) + index -= length; + + return index; +} + +static inline void free_pentry(struct otx_cpt_pending_entry *pentry) +{ + pentry->completion_addr = NULL; + pentry->info = NULL; + pentry->callback = NULL; + pentry->areq = NULL; + pentry->resume_sender = false; + pentry->busy = false; +} + +static inline int setup_sgio_components(struct pci_dev *pdev, + struct otx_cpt_buf_ptr *list, + int buf_count, u8 *buffer) +{ + struct otx_cpt_sglist_component *sg_ptr = NULL; + int ret = 0, i, j; + int components; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input list pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (likely(list[i].vptr)) { + list[i].dma_addr = dma_map_single(&pdev->dev, + list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, + list[i].dma_addr))) { + dev_err(&pdev->dev, "Dma mapping failed\n"); + ret = -EIO; + goto sg_cleanup; + } + } + } + + components = buf_count / 4; + sg_ptr = (struct otx_cpt_sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->u.s.len3 = cpu_to_be16(list[i * 4 + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); + sg_ptr++; + } + components = buf_count % 4; + + switch (components) { + case 3: + sg_ptr->u.s.len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + fallthrough; + case 2: + sg_ptr->u.s.len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + fallthrough; + case 1: + sg_ptr->u.s.len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + break; + default: + break; + } + return ret; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[i].dma_addr, + list[i].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + return ret; +} + +static inline int setup_sgio_list(struct pci_dev *pdev, + struct otx_cpt_info_buffer **pinfo, + struct otx_cpt_req_info *req, gfp_t gfp) +{ + u32 dlen, align_dlen, info_len, rlen; + struct otx_cpt_info_buffer *info; + u16 g_sz_bytes, s_sz_bytes; + int align = CPT_DMA_ALIGN; + u32 total_mem_len; + + if (unlikely(req->incnt > OTX_CPT_MAX_SG_IN_CNT || + req->outcnt > OTX_CPT_MAX_SG_OUT_CNT)) { + dev_err(&pdev->dev, "Error too many sg components\n"); + return -EINVAL; + } + + g_sz_bytes = ((req->incnt + 3) / 4) * + sizeof(struct otx_cpt_sglist_component); + s_sz_bytes = ((req->outcnt + 3) / 4) * + sizeof(struct otx_cpt_sglist_component); + + dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; + align_dlen = ALIGN(dlen, align); + info_len = ALIGN(sizeof(*info), align); + rlen = ALIGN(sizeof(union otx_cpt_res_s), align); + total_mem_len = align_dlen + info_len + rlen + COMPLETION_CODE_SIZE; + + info = kzalloc(total_mem_len, gfp); + if (unlikely(!info)) { + dev_err(&pdev->dev, "Memory allocation failed\n"); + return -ENOMEM; + } + *pinfo = info; + info->dlen = dlen; + info->in_buffer = (u8 *)info + info_len; + + ((__be16 *)info->in_buffer)[0] = cpu_to_be16(req->outcnt); + ((__be16 *)info->in_buffer)[1] = cpu_to_be16(req->incnt); + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + + /* Setup gather (input) components */ + if (setup_sgio_components(pdev, req->in, req->incnt, + &info->in_buffer[8])) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + return -EFAULT; + } + + if (setup_sgio_components(pdev, req->out, req->outcnt, + &info->in_buffer[8 + g_sz_bytes])) { + dev_err(&pdev->dev, "Failed to setup scatter list\n"); + return -EFAULT; + } + + info->dma_len = total_mem_len - info_len; + info->dptr_baddr = dma_map_single(&pdev->dev, (void *)info->in_buffer, + info->dma_len, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { + dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); + return -EIO; + } + /* + * Get buffer for union otx_cpt_res_s response + * structure and its physical address + */ + info->completion_addr = (u64 *)(info->in_buffer + align_dlen); + info->comp_baddr = info->dptr_baddr + align_dlen; + + /* Create and initialize RPTR */ + info->out_buffer = (u8 *)info->completion_addr + rlen; + info->rptr_baddr = info->comp_baddr + rlen; + + *((u64 *) info->out_buffer) = ~((u64) COMPLETION_CODE_INIT); + + return 0; +} + + +static void cpt_fill_inst(union otx_cpt_inst_s *inst, + struct otx_cpt_info_buffer *info, + struct otx_cpt_iq_cmd *cmd) +{ + inst->u[0] = 0x0; + inst->s.doneint = true; + inst->s.res_addr = (u64)info->comp_baddr; + inst->u[2] = 0x0; + inst->s.wq_ptr = 0; + inst->s.ei0 = cmd->cmd.u64; + inst->s.ei1 = cmd->dptr; + inst->s.ei2 = cmd->rptr; + inst->s.ei3 = cmd->cptr.u64; +} + +/* + * On OcteonTX platform the parameter db_count is used as a count for ringing + * door bell. The valid values for db_count are: + * 0 - 1 CPT instruction will be enqueued however CPT will not be informed + * 1 - 1 CPT instruction will be enqueued and CPT will be informed + */ +static void cpt_send_cmd(union otx_cpt_inst_s *cptinst, struct otx_cptvf *cptvf) +{ + struct otx_cpt_cmd_qinfo *qinfo = &cptvf->cqinfo; + struct otx_cpt_cmd_queue *queue; + struct otx_cpt_cmd_chunk *curr; + u8 *ent; + + queue = &qinfo->queue[0]; + /* + * cpt_send_cmd is currently called only from critical section + * therefore no locking is required for accessing instruction queue + */ + ent = &queue->qhead->head[queue->idx * OTX_CPT_INST_SIZE]; + memcpy(ent, (void *) cptinst, OTX_CPT_INST_SIZE); + + if (++queue->idx >= queue->qhead->size / 64) { + curr = queue->qhead; + + if (list_is_last(&curr->nextchunk, &queue->chead)) + queue->qhead = queue->base; + else + queue->qhead = list_next_entry(queue->qhead, nextchunk); + queue->idx = 0; + } + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + otx_cptvf_write_vq_doorbell(cptvf, 1); +} + +static int process_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + struct otx_cpt_pending_queue *pqueue, + struct otx_cptvf *cptvf) +{ + struct otx_cptvf_request *cpt_req = &req->req; + struct otx_cpt_pending_entry *pentry = NULL; + union otx_cpt_ctrl_info *ctrl = &req->ctrl; + struct otx_cpt_info_buffer *info = NULL; + union otx_cpt_res_s *result = NULL; + struct otx_cpt_iq_cmd iq_cmd; + union otx_cpt_inst_s cptinst; + int retry, ret = 0; + u8 resume_sender; + gfp_t gfp; + + gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : + GFP_ATOMIC; + ret = setup_sgio_list(pdev, &info, req, gfp); + if (unlikely(ret)) { + dev_err(&pdev->dev, "Setting up SG list failed\n"); + goto request_cleanup; + } + cpt_req->dlen = info->dlen; + + result = (union otx_cpt_res_s *) info->completion_addr; + result->s.compcode = COMPLETION_CODE_INIT; + + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP; + while (unlikely(!pentry) && retry--) { + spin_unlock_bh(&pqueue->lock); + udelay(CPT_PENTRY_STEP); + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + } + + if (unlikely(!pentry)) { + ret = -ENOSPC; + spin_unlock_bh(&pqueue->lock); + goto request_cleanup; + } + + /* + * Check if we are close to filling in entire pending queue, + * if so then tell the sender to stop/sleep by returning -EBUSY + * We do it only for context which can sleep (GFP_KERNEL) + */ + if (gfp == GFP_KERNEL && + pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) { + pentry->resume_sender = true; + } else + pentry->resume_sender = false; + resume_sender = pentry->resume_sender; + pqueue->pending_count++; + + pentry->completion_addr = info->completion_addr; + pentry->info = info; + pentry->callback = req->callback; + pentry->areq = req->areq; + pentry->busy = true; + info->pentry = pentry; + info->time_in = jiffies; + info->req = req; + + /* Fill in the command */ + iq_cmd.cmd.u64 = 0; + iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); + iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); + iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); + iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); + + iq_cmd.dptr = info->dptr_baddr; + iq_cmd.rptr = info->rptr_baddr; + iq_cmd.cptr.u64 = 0; + iq_cmd.cptr.s.grp = ctrl->s.grp; + + /* Fill in the CPT_INST_S type command for HW interpretation */ + cpt_fill_inst(&cptinst, info, &iq_cmd); + + /* Print debug info if enabled */ + otx_cpt_dump_sg_list(pdev, req); + pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX_CPT_INST_SIZE); + print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX_CPT_INST_SIZE, false); + pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen); + print_hex_dump_debug("", 0, 16, 1, info->in_buffer, + cpt_req->dlen, false); + + /* Send CPT command */ + cpt_send_cmd(&cptinst, cptvf); + + /* + * We allocate and prepare pending queue entry in critical section + * together with submitting CPT instruction to CPT instruction queue + * to make sure that order of CPT requests is the same in both + * pending and instruction queues + */ + spin_unlock_bh(&pqueue->lock); + + ret = resume_sender ? -EBUSY : -EINPROGRESS; + return ret; + +request_cleanup: + do_request_cleanup(pdev, info); + return ret; +} + +int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + int cpu_num) +{ + struct otx_cptvf *cptvf = pci_get_drvdata(pdev); + + if (!otx_cpt_device_ready(cptvf)) { + dev_err(&pdev->dev, "CPT Device is not ready\n"); + return -ENODEV; + } + + if ((cptvf->vftype == OTX_CPT_SE_TYPES) && (!req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of SE TYPE got AE request\n", + cptvf->vfid); + return -EINVAL; + } else if ((cptvf->vftype == OTX_CPT_AE_TYPES) && + (req->ctrl.s.se_req)) { + dev_err(&pdev->dev, "CPTVF-%d of AE TYPE got SE request\n", + cptvf->vfid); + return -EINVAL; + } + + return process_request(pdev, req, &cptvf->pqinfo.queue[0], cptvf); +} + +static int cpt_process_ccode(struct pci_dev *pdev, + union otx_cpt_res_s *cpt_status, + struct otx_cpt_info_buffer *cpt_info, + struct otx_cpt_req_info *req, u32 *res_code) +{ + u8 ccode = cpt_status->s.compcode; + union otx_cpt_error_code ecode; + + ecode.u = be64_to_cpup((__be64 *)cpt_info->out_buffer); + switch (ccode) { + case CPT_COMP_E_FAULT: + dev_err(&pdev->dev, + "Request failed with DMA fault\n"); + otx_cpt_dump_sg_list(pdev, req); + break; + + case CPT_COMP_E_SWERR: + dev_err(&pdev->dev, + "Request failed with software error code %d\n", + ecode.s.ccode); + otx_cpt_dump_sg_list(pdev, req); + break; + + case CPT_COMP_E_HWERR: + dev_err(&pdev->dev, + "Request failed with hardware error\n"); + otx_cpt_dump_sg_list(pdev, req); + break; + + case COMPLETION_CODE_INIT: + /* check for timeout */ + if (time_after_eq(jiffies, cpt_info->time_in + + OTX_CPT_COMMAND_TIMEOUT * HZ)) + dev_warn(&pdev->dev, "Request timed out 0x%p\n", req); + else if (cpt_info->extra_time < OTX_CPT_TIME_IN_RESET_COUNT) { + cpt_info->time_in = jiffies; + cpt_info->extra_time++; + } + return 1; + + case CPT_COMP_E_GOOD: + /* Check microcode completion code */ + if (ecode.s.ccode) { + /* + * If requested hmac is truncated and ucode returns + * s/g write length error then we report success + * because ucode writes as many bytes of calculated + * hmac as available in gather buffer and reports + * s/g write length error if number of bytes in gather + * buffer is less than full hmac size. + */ + if (req->is_trunc_hmac && + ecode.s.ccode == ERR_SCATTER_GATHER_WRITE_LENGTH) { + *res_code = 0; + break; + } + + dev_err(&pdev->dev, + "Request failed with software error code 0x%x\n", + ecode.s.ccode); + otx_cpt_dump_sg_list(pdev, req); + break; + } + + /* Request has been processed with success */ + *res_code = 0; + break; + + default: + dev_err(&pdev->dev, "Request returned invalid status\n"); + break; + } + + return 0; +} + +static inline void process_pending_queue(struct pci_dev *pdev, + struct otx_cpt_pending_queue *pqueue) +{ + void (*callback)(int status, void *arg1, void *arg2); + struct otx_cpt_pending_entry *resume_pentry = NULL; + struct otx_cpt_pending_entry *pentry = NULL; + struct otx_cpt_info_buffer *cpt_info = NULL; + union otx_cpt_res_s *cpt_status = NULL; + struct otx_cpt_req_info *req = NULL; + struct crypto_async_request *areq; + u32 res_code, resume_index; + + while (1) { + spin_lock_bh(&pqueue->lock); + pentry = &pqueue->head[pqueue->front]; + + if (WARN_ON(!pentry)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + res_code = -EINVAL; + if (unlikely(!pentry->busy)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + if (unlikely(!pentry->callback)) { + dev_err(&pdev->dev, "Callback NULL\n"); + goto process_pentry; + } + + cpt_info = pentry->info; + if (unlikely(!cpt_info)) { + dev_err(&pdev->dev, "Pending entry post arg NULL\n"); + goto process_pentry; + } + + req = cpt_info->req; + if (unlikely(!req)) { + dev_err(&pdev->dev, "Request NULL\n"); + goto process_pentry; + } + + cpt_status = (union otx_cpt_res_s *) pentry->completion_addr; + if (unlikely(!cpt_status)) { + dev_err(&pdev->dev, "Completion address NULL\n"); + goto process_pentry; + } + + if (cpt_process_ccode(pdev, cpt_status, cpt_info, req, + &res_code)) { + spin_unlock_bh(&pqueue->lock); + return; + } + cpt_info->pdev = pdev; + +process_pentry: + /* + * Check if we should inform sending side to resume + * We do it CPT_IQ_RESUME_MARGIN elements in advance before + * pending queue becomes empty + */ + resume_index = modulo_inc(pqueue->front, pqueue->qlen, + CPT_IQ_RESUME_MARGIN); + resume_pentry = &pqueue->head[resume_index]; + if (resume_pentry && + resume_pentry->resume_sender) { + resume_pentry->resume_sender = false; + callback = resume_pentry->callback; + areq = resume_pentry->areq; + + if (callback) { + spin_unlock_bh(&pqueue->lock); + + /* + * EINPROGRESS is an indication for sending + * side that it can resume sending requests + */ + callback(-EINPROGRESS, areq, cpt_info); + spin_lock_bh(&pqueue->lock); + } + } + + callback = pentry->callback; + areq = pentry->areq; + free_pentry(pentry); + + pqueue->pending_count--; + pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1); + spin_unlock_bh(&pqueue->lock); + + /* + * Call callback after current pending entry has been + * processed, we don't do it if the callback pointer is + * invalid. + */ + if (callback) + callback(res_code, areq, cpt_info); + } +} + +void otx_cpt_post_process(struct otx_cptvf_wqe *wqe) +{ + process_pending_queue(wqe->cptvf->pdev, &wqe->cptvf->pqinfo.queue[0]); +} diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h new file mode 100644 index 0000000000..a02d059fb6 --- /dev/null +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_reqmgr.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Marvell OcteonTX CPT driver + * + * Copyright (C) 2019 Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __OTX_CPTVF_REQUEST_MANAGER_H +#define __OTX_CPTVF_REQUEST_MANAGER_H + +#include <linux/types.h> +#include <linux/crypto.h> +#include <linux/pci.h> +#include "otx_cpt_hw_types.h" + +/* + * Maximum total number of SG buffers is 100, we divide it equally + * between input and output + */ +#define OTX_CPT_MAX_SG_IN_CNT 50 +#define OTX_CPT_MAX_SG_OUT_CNT 50 + +/* DMA mode direct or SG */ +#define OTX_CPT_DMA_DIRECT_DIRECT 0 +#define OTX_CPT_DMA_GATHER_SCATTER 1 + +/* Context source CPTR or DPTR */ +#define OTX_CPT_FROM_CPTR 0 +#define OTX_CPT_FROM_DPTR 1 + +/* CPT instruction queue alignment */ +#define OTX_CPT_INST_Q_ALIGNMENT 128 +#define OTX_CPT_MAX_REQ_SIZE 65535 + +/* Default command timeout in seconds */ +#define OTX_CPT_COMMAND_TIMEOUT 4 +#define OTX_CPT_TIMER_HOLD 0x03F +#define OTX_CPT_COUNT_HOLD 32 +#define OTX_CPT_TIME_IN_RESET_COUNT 5 + +/* Minimum and maximum values for interrupt coalescing */ +#define OTX_CPT_COALESC_MIN_TIME_WAIT 0x0 +#define OTX_CPT_COALESC_MAX_TIME_WAIT ((1<<16)-1) +#define OTX_CPT_COALESC_MIN_NUM_WAIT 0x0 +#define OTX_CPT_COALESC_MAX_NUM_WAIT ((1<<20)-1) + +union otx_cpt_opcode_info { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +struct otx_cptvf_request { + u32 param1; + u32 param2; + u16 dlen; + union otx_cpt_opcode_info opcode; +}; + +struct otx_cpt_buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +union otx_cpt_ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved0:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1; /* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved0:26; +#endif + } s; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union otx_cpt_iq_cmd_word0 { + u64 u64; + struct { + __be16 opcode; + __be16 param1; + __be16 param2; + __be16 dlen; + } s; +}; + +union otx_cpt_iq_cmd_word3 { + u64 u64; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 grp:3; + u64 cptr:61; +#else + u64 cptr:61; + u64 grp:3; +#endif + } s; +}; + +struct otx_cpt_iq_cmd { + union otx_cpt_iq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union otx_cpt_iq_cmd_word3 cptr; +}; + +struct otx_cpt_sglist_component { + union { + u64 len; + struct { + __be16 len0; + __be16 len1; + __be16 len2; + __be16 len3; + } s; + } u; + __be64 ptr0; + __be64 ptr1; + __be64 ptr2; + __be64 ptr3; +}; + +struct otx_cpt_pending_entry { + u64 *completion_addr; /* Completion address */ + struct otx_cpt_info_buffer *info; + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + u8 resume_sender; /* Notify sender to resume sending requests */ + u8 busy; /* Entry status (free/busy) */ +}; + +struct otx_cpt_pending_queue { + struct otx_cpt_pending_entry *head; /* Head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + u32 pending_count; /* Pending requests count */ + u32 qlen; /* Queue length */ + spinlock_t lock; /* Queue lock */ +}; + +struct otx_cpt_req_info { + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + struct otx_cptvf_request req;/* Request information (core specific) */ + union otx_cpt_ctrl_info ctrl;/* User control information */ + struct otx_cpt_buf_ptr in[OTX_CPT_MAX_SG_IN_CNT]; + struct otx_cpt_buf_ptr out[OTX_CPT_MAX_SG_OUT_CNT]; + u8 *iv_out; /* IV to send back */ + u16 rlen; /* Output length */ + u8 incnt; /* Number of input buffers */ + u8 outcnt; /* Number of output buffers */ + u8 req_type; /* Type of request */ + u8 is_enc; /* Is a request an encryption request */ + u8 is_trunc_hmac;/* Is truncated hmac used */ +}; + +struct otx_cpt_info_buffer { + struct otx_cpt_pending_entry *pentry; + struct otx_cpt_req_info *req; + struct pci_dev *pdev; + u64 *completion_addr; + u8 *out_buffer; + u8 *in_buffer; + dma_addr_t dptr_baddr; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + unsigned long time_in; + u32 dlen; + u32 dma_len; + u8 extra_time; +}; + +static inline void do_request_cleanup(struct pci_dev *pdev, + struct otx_cpt_info_buffer *info) +{ + struct otx_cpt_req_info *req; + int i; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dma_len, DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->outcnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->incnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + kfree_sensitive(info); +} + +struct otx_cptvf_wqe; +void otx_cpt_dump_sg_list(struct pci_dev *pdev, struct otx_cpt_req_info *req); +void otx_cpt_post_process(struct otx_cptvf_wqe *wqe); +int otx_cpt_do_request(struct pci_dev *pdev, struct otx_cpt_req_info *req, + int cpu_num); + +#endif /* __OTX_CPTVF_REQUEST_MANAGER_H */ diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile new file mode 100644 index 0000000000..f0f2942c1d --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptcommon.o rvu_cptpf.o rvu_cptvf.o + +rvu_cptcommon-objs := cn10k_cpt.o otx2_cptlf.o otx2_cpt_mbox_common.o +rvu_cptpf-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \ + otx2_cptpf_ucode.o otx2_cpt_devlink.o +rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o \ + otx2_cptvf_reqmgr.o otx2_cptvf_algs.o + +ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c new file mode 100644 index 0000000000..93d22b3289 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Marvell. */ + +#include <linux/soc/marvell/octeontx2/asm.h> +#include "otx2_cptpf.h" +#include "otx2_cptvf.h" +#include "otx2_cptlf.h" +#include "cn10k_cpt.h" + +static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, + struct otx2_cptlf_info *lf); + +static struct cpt_hw_ops otx2_hw_ops = { + .send_cmd = otx2_cpt_send_cmd, + .cpt_get_compcode = otx2_cpt_get_compcode, + .cpt_get_uc_compcode = otx2_cpt_get_uc_compcode, +}; + +static struct cpt_hw_ops cn10k_hw_ops = { + .send_cmd = cn10k_cpt_send_cmd, + .cpt_get_compcode = cn10k_cpt_get_compcode, + .cpt_get_uc_compcode = cn10k_cpt_get_uc_compcode, +}; + +static void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num, + struct otx2_cptlf_info *lf) +{ + void __iomem *lmtline = lf->lmtline; + u64 val = (lf->slot & 0x7FF); + u64 tar_addr = 0; + + /* tar_addr<6:4> = Size of first LMTST - 1 in units of 128b. */ + tar_addr |= (__force u64)lf->ioreg | + (((OTX2_CPT_INST_SIZE/16) - 1) & 0x7) << 4; + /* + * Make sure memory areas pointed in CPT_INST_S + * are flushed before the instruction is sent to CPT + */ + dma_wmb(); + + /* Copy CPT command to LMTLINE */ + memcpy_toio(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE); + cn10k_lmt_flush(val, tar_addr); +} + +int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf) +{ + struct pci_dev *pdev = cptpf->pdev; + resource_size_t size; + u64 lmt_base; + + if (!test_bit(CN10K_LMTST, &cptpf->cap_flag)) { + cptpf->lfs.ops = &otx2_hw_ops; + return 0; + } + + cptpf->lfs.ops = &cn10k_hw_ops; + lmt_base = readq(cptpf->reg_base + RVU_PF_LMTLINE_ADDR); + if (!lmt_base) { + dev_err(&pdev->dev, "PF LMTLINE address not configured\n"); + return -ENOMEM; + } + size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); + size -= ((1 + cptpf->max_vfs) * MBOX_SIZE); + cptpf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, lmt_base, size); + if (!cptpf->lfs.lmt_base) { + dev_err(&pdev->dev, + "Mapping of PF LMTLINE address failed\n"); + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cn10k_cptpf_lmtst_init, CRYPTO_DEV_OCTEONTX2_CPT); + +int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + resource_size_t offset, size; + + if (!test_bit(CN10K_LMTST, &cptvf->cap_flag)) { + cptvf->lfs.ops = &otx2_hw_ops; + return 0; + } + + cptvf->lfs.ops = &cn10k_hw_ops; + offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); + size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); + /* Map VF LMILINE region */ + cptvf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, offset, size); + if (!cptvf->lfs.lmt_base) { + dev_err(&pdev->dev, "Unable to map BAR4\n"); + return -ENOMEM; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cn10k_cptvf_lmtst_init, CRYPTO_DEV_OCTEONTX2_CPT); diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h new file mode 100644 index 0000000000..aaefc7e38e --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2021 Marvell. + */ +#ifndef __CN10K_CPT_H +#define __CN10K_CPT_H + +#include "otx2_cpt_common.h" +#include "otx2_cptpf.h" +#include "otx2_cptvf.h" + +static inline u8 cn10k_cpt_get_compcode(union otx2_cpt_res_s *result) +{ + return ((struct cn10k_cpt_res_s *)result)->compcode; +} + +static inline u8 cn10k_cpt_get_uc_compcode(union otx2_cpt_res_s *result) +{ + return ((struct cn10k_cpt_res_s *)result)->uc_compcode; +} + +static inline u8 otx2_cpt_get_compcode(union otx2_cpt_res_s *result) +{ + return ((struct cn9k_cpt_res_s *)result)->compcode; +} + +static inline u8 otx2_cpt_get_uc_compcode(union otx2_cpt_res_s *result) +{ + return ((struct cn9k_cpt_res_s *)result)->uc_compcode; +} + +int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf); +int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf); + +#endif /* __CN10K_CPTLF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h new file mode 100644 index 0000000000..46b778bbbe --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_COMMON_H +#define __OTX2_CPT_COMMON_H + +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/module.h> +#include <linux/delay.h> +#include <linux/crypto.h> +#include <net/devlink.h> +#include "otx2_cpt_hw_types.h" +#include "rvu.h" +#include "mbox.h" + +#define OTX2_CPT_MAX_VFS_NUM 128 +#define OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs) \ + (((blk) << 20) | ((slot) << 12) | (offs)) +#define OTX2_CPT_RVU_PFFUNC(pf, func) \ + ((((pf) & RVU_PFVF_PF_MASK) << RVU_PFVF_PF_SHIFT) | \ + (((func) & RVU_PFVF_FUNC_MASK) << RVU_PFVF_FUNC_SHIFT)) + +#define OTX2_CPT_INVALID_CRYPTO_ENG_GRP 0xFF +#define OTX2_CPT_NAME_LENGTH 64 +#define OTX2_CPT_DMA_MINALIGN 128 + +/* HW capability flags */ +#define CN10K_MBOX 0 +#define CN10K_LMTST 1 + +#define BAD_OTX2_CPT_ENG_TYPE OTX2_CPT_MAX_ENG_TYPES + +enum otx2_cpt_eng_type { + OTX2_CPT_AE_TYPES = 1, + OTX2_CPT_SE_TYPES = 2, + OTX2_CPT_IE_TYPES = 3, + OTX2_CPT_MAX_ENG_TYPES, +}; + +/* Take mbox id from end of CPT mbox range in AF (range 0xA00 - 0xBFF) */ +#define MBOX_MSG_RX_INLINE_IPSEC_LF_CFG 0xBFE +#define MBOX_MSG_GET_ENG_GRP_NUM 0xBFF +#define MBOX_MSG_GET_CAPS 0xBFD +#define MBOX_MSG_GET_KVF_LIMITS 0xBFC + +/* + * Message request to config cpt lf for inline inbound ipsec. + * This message is only used between CPT PF <-> CPT VF + */ +struct otx2_cpt_rx_inline_lf_cfg { + struct mbox_msghdr hdr; + u16 sso_pf_func; + u16 param1; + u16 param2; + u16 opcode; + u32 credit; + u32 reserved; +}; + +/* + * Message request and response to get engine group number + * which has attached a given type of engines (SE, AE, IE) + * This messages are only used between CPT PF <=> CPT VF + */ +struct otx2_cpt_egrp_num_msg { + struct mbox_msghdr hdr; + u8 eng_type; +}; + +struct otx2_cpt_egrp_num_rsp { + struct mbox_msghdr hdr; + u8 eng_type; + u8 eng_grp_num; +}; + +/* + * Message request and response to get kernel crypto limits + * This messages are only used between CPT PF <-> CPT VF + */ +struct otx2_cpt_kvf_limits_msg { + struct mbox_msghdr hdr; +}; + +struct otx2_cpt_kvf_limits_rsp { + struct mbox_msghdr hdr; + u8 kvf_limits; +}; + +/* CPT HW capabilities */ +union otx2_cpt_eng_caps { + u64 u; + struct { + u64 reserved_0_4:5; + u64 mul:1; + u64 sha1_sha2:1; + u64 chacha20:1; + u64 zuc_snow3g:1; + u64 sha3:1; + u64 aes:1; + u64 kasumi:1; + u64 des:1; + u64 crc:1; + u64 reserved_14_63:50; + }; +}; + +/* + * Message request and response to get HW capabilities for each + * engine type (SE, IE, AE). + * This messages are only used between CPT PF <=> CPT VF + */ +struct otx2_cpt_caps_msg { + struct mbox_msghdr hdr; +}; + +struct otx2_cpt_caps_rsp { + struct mbox_msghdr hdr; + u16 cpt_pf_drv_version; + u8 cpt_revision; + union otx2_cpt_eng_caps eng_caps[OTX2_CPT_MAX_ENG_TYPES]; +}; + +static inline void otx2_cpt_write64(void __iomem *reg_base, u64 blk, u64 slot, + u64 offs, u64 val) +{ + writeq_relaxed(val, reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs)); +} + +static inline u64 otx2_cpt_read64(void __iomem *reg_base, u64 blk, u64 slot, + u64 offs) +{ + return readq_relaxed(reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs)); +} + +static inline bool is_dev_otx2(struct pci_dev *pdev) +{ + if (pdev->device == OTX2_CPT_PCI_PF_DEVICE_ID || + pdev->device == OTX2_CPT_PCI_VF_DEVICE_ID) + return true; + + return false; +} + +static inline void otx2_cpt_set_hw_caps(struct pci_dev *pdev, + unsigned long *cap_flag) +{ + if (!is_dev_otx2(pdev)) { + __set_bit(CN10K_MBOX, cap_flag); + __set_bit(CN10K_LMTST, cap_flag); + } +} + + +int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); +int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev); + +int otx2_cpt_send_af_reg_requests(struct otx2_mbox *mbox, + struct pci_dev *pdev); +int otx2_cpt_add_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 val, int blkaddr); +int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 *val, int blkaddr); +int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 val, int blkaddr); +struct otx2_cptlfs_info; +int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs); +int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs); +int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs); +int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox); + +#endif /* __OTX2_CPT_COMMON_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c new file mode 100644 index 0000000000..a2aba0b0d6 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Marvell. */ + +#include "otx2_cpt_devlink.h" + +static int otx2_cpt_dl_egrp_create(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); + struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; + + return otx2_cpt_dl_custom_egrp_create(cptpf, ctx); +} + +static int otx2_cpt_dl_egrp_delete(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); + struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; + + return otx2_cpt_dl_custom_egrp_delete(cptpf, ctx); +} + +static int otx2_cpt_dl_uc_info(struct devlink *dl, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); + struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; + + otx2_cpt_print_uc_dbg_info(cptpf); + + return 0; +} + +enum otx2_cpt_dl_param_id { + OTX2_CPT_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + OTX2_CPT_DEVLINK_PARAM_ID_EGRP_CREATE, + OTX2_CPT_DEVLINK_PARAM_ID_EGRP_DELETE, +}; + +static const struct devlink_param otx2_cpt_dl_params[] = { + DEVLINK_PARAM_DRIVER(OTX2_CPT_DEVLINK_PARAM_ID_EGRP_CREATE, + "egrp_create", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + otx2_cpt_dl_uc_info, otx2_cpt_dl_egrp_create, + NULL), + DEVLINK_PARAM_DRIVER(OTX2_CPT_DEVLINK_PARAM_ID_EGRP_DELETE, + "egrp_delete", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_RUNTIME), + otx2_cpt_dl_uc_info, otx2_cpt_dl_egrp_delete, + NULL), +}; + +static int otx2_cpt_dl_info_firmware_version_put(struct devlink_info_req *req, + struct otx2_cpt_eng_grp_info grp[], + const char *ver_name, int eng_type) +{ + struct otx2_cpt_engs_rsvd *eng; + int i; + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + eng = find_engines_by_type(&grp[i], eng_type); + if (eng) + return devlink_info_version_running_put(req, ver_name, + eng->ucode->ver_str); + } + + return 0; +} + +static int otx2_cpt_devlink_info_get(struct devlink *dl, + struct devlink_info_req *req, + struct netlink_ext_ack *extack) +{ + struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); + struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; + int err; + + err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp, + "fw.ae", OTX2_CPT_AE_TYPES); + if (err) + return err; + + err = otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp, + "fw.se", OTX2_CPT_SE_TYPES); + if (err) + return err; + + return otx2_cpt_dl_info_firmware_version_put(req, cptpf->eng_grps.grp, + "fw.ie", OTX2_CPT_IE_TYPES); +} + +static const struct devlink_ops otx2_cpt_devlink_ops = { + .info_get = otx2_cpt_devlink_info_get, +}; + +int otx2_cpt_register_dl(struct otx2_cptpf_dev *cptpf) +{ + struct device *dev = &cptpf->pdev->dev; + struct otx2_cpt_devlink *cpt_dl; + struct devlink *dl; + int ret; + + dl = devlink_alloc(&otx2_cpt_devlink_ops, + sizeof(struct otx2_cpt_devlink), dev); + if (!dl) { + dev_warn(dev, "devlink_alloc failed\n"); + return -ENOMEM; + } + + cpt_dl = devlink_priv(dl); + cpt_dl->dl = dl; + cpt_dl->cptpf = cptpf; + cptpf->dl = dl; + ret = devlink_params_register(dl, otx2_cpt_dl_params, + ARRAY_SIZE(otx2_cpt_dl_params)); + if (ret) { + dev_err(dev, "devlink params register failed with error %d", + ret); + devlink_free(dl); + return ret; + } + + devlink_register(dl); + + return 0; +} + +void otx2_cpt_unregister_dl(struct otx2_cptpf_dev *cptpf) +{ + struct devlink *dl = cptpf->dl; + + if (!dl) + return; + + devlink_unregister(dl); + devlink_params_unregister(dl, otx2_cpt_dl_params, + ARRAY_SIZE(otx2_cpt_dl_params)); + devlink_free(dl); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.h new file mode 100644 index 0000000000..8b7d88c5d5 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2021 Marvell. + */ + +#ifndef __OTX2_CPT_DEVLINK_H +#define __OTX2_CPT_DEVLINK_H + +#include "otx2_cpt_common.h" +#include "otx2_cptpf.h" + +struct otx2_cpt_devlink { + struct devlink *dl; + struct otx2_cptpf_dev *cptpf; +}; + +/* Devlink APIs */ +int otx2_cpt_register_dl(struct otx2_cptpf_dev *cptpf); +void otx2_cpt_unregister_dl(struct otx2_cptpf_dev *cptpf); + +#endif /* __OTX2_CPT_DEVLINK_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h new file mode 100644 index 0000000000..6f947978e4 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h @@ -0,0 +1,476 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_HW_TYPES_H +#define __OTX2_CPT_HW_TYPES_H + +#include <linux/types.h> + +/* Device IDs */ +#define OTX2_CPT_PCI_PF_DEVICE_ID 0xA0FD +#define OTX2_CPT_PCI_VF_DEVICE_ID 0xA0FE +#define CN10K_CPT_PCI_PF_DEVICE_ID 0xA0F2 +#define CN10K_CPT_PCI_VF_DEVICE_ID 0xA0F3 + +/* Mailbox interrupts offset */ +#define OTX2_CPT_PF_MBOX_INT 6 +#define OTX2_CPT_PF_INT_VEC_E_MBOXX(x, a) ((x) + (a)) + +/* Maximum supported microcode groups */ +#define OTX2_CPT_MAX_ENGINE_GROUPS 8 + +/* CPT instruction size in bytes */ +#define OTX2_CPT_INST_SIZE 64 +/* + * CPT VF MSIX vectors and their offsets + */ +#define OTX2_CPT_VF_MSIX_VECTORS 1 +#define OTX2_CPT_VF_INTR_MBOX_MASK BIT(0) +#define CN10K_CPT_VF_MBOX_REGION (0xC0000) + +/* CPT LF MSIX vectors */ +#define OTX2_CPT_LF_MSIX_VECTORS 2 + +/* OcteonTX2 CPT PF registers */ +#define OTX2_CPT_PF_CONSTANTS (0x0) +#define OTX2_CPT_PF_RESET (0x100) +#define OTX2_CPT_PF_DIAG (0x120) +#define OTX2_CPT_PF_BIST_STATUS (0x160) +#define OTX2_CPT_PF_ECC0_CTL (0x200) +#define OTX2_CPT_PF_ECC0_FLIP (0x210) +#define OTX2_CPT_PF_ECC0_INT (0x220) +#define OTX2_CPT_PF_ECC0_INT_W1S (0x230) +#define OTX2_CPT_PF_ECC0_ENA_W1S (0x240) +#define OTX2_CPT_PF_ECC0_ENA_W1C (0x250) +#define OTX2_CPT_PF_MBOX_INTX(b) (0x400 | (b) << 3) +#define OTX2_CPT_PF_MBOX_INT_W1SX(b) (0x420 | (b) << 3) +#define OTX2_CPT_PF_MBOX_ENA_W1CX(b) (0x440 | (b) << 3) +#define OTX2_CPT_PF_MBOX_ENA_W1SX(b) (0x460 | (b) << 3) +#define OTX2_CPT_PF_EXEC_INT (0x500) +#define OTX2_CPT_PF_EXEC_INT_W1S (0x520) +#define OTX2_CPT_PF_EXEC_ENA_W1C (0x540) +#define OTX2_CPT_PF_EXEC_ENA_W1S (0x560) +#define OTX2_CPT_PF_GX_EN(b) (0x600 | (b) << 3) +#define OTX2_CPT_PF_EXEC_INFO (0x700) +#define OTX2_CPT_PF_EXEC_BUSY (0x800) +#define OTX2_CPT_PF_EXEC_INFO0 (0x900) +#define OTX2_CPT_PF_EXEC_INFO1 (0x910) +#define OTX2_CPT_PF_INST_REQ_PC (0x10000) +#define OTX2_CPT_PF_INST_LATENCY_PC (0x10020) +#define OTX2_CPT_PF_RD_REQ_PC (0x10040) +#define OTX2_CPT_PF_RD_LATENCY_PC (0x10060) +#define OTX2_CPT_PF_RD_UC_PC (0x10080) +#define OTX2_CPT_PF_ACTIVE_CYCLES_PC (0x10100) +#define OTX2_CPT_PF_EXE_CTL (0x4000000) +#define OTX2_CPT_PF_EXE_STATUS (0x4000008) +#define OTX2_CPT_PF_EXE_CLK (0x4000010) +#define OTX2_CPT_PF_EXE_DBG_CTL (0x4000018) +#define OTX2_CPT_PF_EXE_DBG_DATA (0x4000020) +#define OTX2_CPT_PF_EXE_BIST_STATUS (0x4000028) +#define OTX2_CPT_PF_EXE_REQ_TIMER (0x4000030) +#define OTX2_CPT_PF_EXE_MEM_CTL (0x4000038) +#define OTX2_CPT_PF_EXE_PERF_CTL (0x4001000) +#define OTX2_CPT_PF_EXE_DBG_CNTX(b) (0x4001100 | (b) << 3) +#define OTX2_CPT_PF_EXE_PERF_EVENT_CNT (0x4001180) +#define OTX2_CPT_PF_EXE_EPCI_INBX_CNT(b) (0x4001200 | (b) << 3) +#define OTX2_CPT_PF_EXE_EPCI_OUTBX_CNT(b) (0x4001240 | (b) << 3) +#define OTX2_CPT_PF_ENGX_UCODE_BASE(b) (0x4002000 | (b) << 3) +#define OTX2_CPT_PF_QX_CTL(b) (0x8000000 | (b) << 20) +#define OTX2_CPT_PF_QX_GMCTL(b) (0x8000020 | (b) << 20) +#define OTX2_CPT_PF_QX_CTL2(b) (0x8000100 | (b) << 20) +#define OTX2_CPT_PF_VFX_MBOXX(b, c) (0x8001000 | (b) << 20 | \ + (c) << 8) + +/* OcteonTX2 CPT LF registers */ +#define OTX2_CPT_LF_CTL (0x10) +#define OTX2_CPT_LF_DONE_WAIT (0x30) +#define OTX2_CPT_LF_INPROG (0x40) +#define OTX2_CPT_LF_DONE (0x50) +#define OTX2_CPT_LF_DONE_ACK (0x60) +#define OTX2_CPT_LF_DONE_INT_ENA_W1S (0x90) +#define OTX2_CPT_LF_DONE_INT_ENA_W1C (0xa0) +#define OTX2_CPT_LF_MISC_INT (0xb0) +#define OTX2_CPT_LF_MISC_INT_W1S (0xc0) +#define OTX2_CPT_LF_MISC_INT_ENA_W1S (0xd0) +#define OTX2_CPT_LF_MISC_INT_ENA_W1C (0xe0) +#define OTX2_CPT_LF_Q_BASE (0xf0) +#define OTX2_CPT_LF_Q_SIZE (0x100) +#define OTX2_CPT_LF_Q_INST_PTR (0x110) +#define OTX2_CPT_LF_Q_GRP_PTR (0x120) +#define OTX2_CPT_LF_NQX(a) (0x400 | (a) << 3) +#define OTX2_CPT_RVU_FUNC_BLKADDR_SHIFT 20 +/* LMT LF registers */ +#define OTX2_CPT_LMT_LFBASE BIT_ULL(OTX2_CPT_RVU_FUNC_BLKADDR_SHIFT) +#define OTX2_CPT_LMT_LF_LMTLINEX(a) (OTX2_CPT_LMT_LFBASE | 0x000 | \ + (a) << 12) +/* RVU VF registers */ +#define OTX2_RVU_VF_INT (0x20) +#define OTX2_RVU_VF_INT_W1S (0x28) +#define OTX2_RVU_VF_INT_ENA_W1S (0x30) +#define OTX2_RVU_VF_INT_ENA_W1C (0x38) + +/* + * Enumeration otx2_cpt_ucode_error_code_e + * + * Enumerates ucode errors + */ +enum otx2_cpt_ucode_comp_code_e { + OTX2_CPT_UCC_SUCCESS = 0x00, + OTX2_CPT_UCC_INVALID_OPCODE = 0x01, + + /* Scatter gather */ + OTX2_CPT_UCC_SG_WRITE_LENGTH = 0x02, + OTX2_CPT_UCC_SG_LIST = 0x03, + OTX2_CPT_UCC_SG_NOT_SUPPORTED = 0x04, + +}; + +/* + * Enumeration otx2_cpt_comp_e + * + * OcteonTX2 CPT Completion Enumeration + * Enumerates the values of CPT_RES_S[COMPCODE]. + */ +enum otx2_cpt_comp_e { + OTX2_CPT_COMP_E_NOTDONE = 0x00, + OTX2_CPT_COMP_E_GOOD = 0x01, + OTX2_CPT_COMP_E_FAULT = 0x02, + OTX2_CPT_COMP_E_HWERR = 0x04, + OTX2_CPT_COMP_E_INSTERR = 0x05, + OTX2_CPT_COMP_E_WARN = 0x06 +}; + +/* + * Enumeration otx2_cpt_vf_int_vec_e + * + * OcteonTX2 CPT VF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx2_cpt_vf_int_vec_e { + OTX2_CPT_VF_INT_VEC_E_MBOX = 0x00 +}; + +/* + * Enumeration otx2_cpt_lf_int_vec_e + * + * OcteonTX2 CPT LF MSI-X Vector Enumeration + * Enumerates the MSI-X interrupt vectors. + */ +enum otx2_cpt_lf_int_vec_e { + OTX2_CPT_LF_INT_VEC_E_MISC = 0x00, + OTX2_CPT_LF_INT_VEC_E_DONE = 0x01 +}; + +/* + * Structure otx2_cpt_inst_s + * + * CPT Instruction Structure + * This structure specifies the instruction layout. Instructions are + * stored in memory as little-endian unless CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_inst_s_s + * Word 0 + * doneint:1 Done interrupt. + * 0 = No interrupts related to this instruction. + * 1 = When the instruction completes, CPT()_VQ()_DONE[DONE] will be + * incremented,and based on the rules described there an interrupt may + * occur. + * Word 1 + * res_addr [127: 64] Result IOVA. + * If nonzero, specifies where to write CPT_RES_S. + * If zero, no result structure will be written. + * Address must be 16-byte aligned. + * Bits <63:49> are ignored by hardware; software should use a + * sign-extended bit <48> for forward compatibility. + * Word 2 + * grp:10 [171:162] If [WQ_PTR] is nonzero, the SSO guest-group to use when + * CPT submits work SSO. + * For the SSO to not discard the add-work request, FPA_PF_MAP() must map + * [GRP] and CPT()_PF_Q()_GMCTL[GMID] as valid. + * tt:2 [161:160] If [WQ_PTR] is nonzero, the SSO tag type to use when CPT + * submits work to SSO + * tag:32 [159:128] If [WQ_PTR] is nonzero, the SSO tag to use when CPT + * submits work to SSO. + * Word 3 + * wq_ptr [255:192] If [WQ_PTR] is nonzero, it is a pointer to a + * work-queue entry that CPT submits work to SSO after all context, + * output data, and result write operations are visible to other + * CNXXXX units and the cores. Bits <2:0> must be zero. + * Bits <63:49> are ignored by hardware; software should + * use a sign-extended bit <48> for forward compatibility. + * Internal: + * Bits <63:49>, <2:0> are ignored by hardware, treated as always 0x0. + * Word 4 + * ei0; [319:256] Engine instruction word 0. Passed to the AE/SE. + * Word 5 + * ei1; [383:320] Engine instruction word 1. Passed to the AE/SE. + * Word 6 + * ei2; [447:384] Engine instruction word 1. Passed to the AE/SE. + * Word 7 + * ei3; [511:448] Engine instruction word 1. Passed to the AE/SE. + * + */ +union otx2_cpt_inst_s { + u64 u[8]; + + struct { + /* Word 0 */ + u64 nixtxl:3; + u64 doneint:1; + u64 nixtx_addr:60; + /* Word 1 */ + u64 res_addr; + /* Word 2 */ + u64 tag:32; + u64 tt:2; + u64 grp:10; + u64 reserved_172_175:4; + u64 rvu_pf_func:16; + /* Word 3 */ + u64 qord:1; + u64 reserved_194_193:2; + u64 wq_ptr:61; + /* Word 4 */ + u64 ei0; + /* Word 5 */ + u64 ei1; + /* Word 6 */ + u64 ei2; + /* Word 7 */ + u64 ei3; + } s; +}; + +/* + * Structure otx2_cpt_res_s + * + * CPT Result Structure + * The CPT coprocessor writes the result structure after it completes a + * CPT_INST_S instruction. The result structure is exactly 16 bytes, and + * each instruction completion produces exactly one result structure. + * + * This structure is stored in memory as little-endian unless + * CPT()_PF_Q()_CTL[INST_BE] is set. + * cpt_res_s_s + * Word 0 + * doneint:1 [16:16] Done interrupt. This bit is copied from the + * corresponding instruction's CPT_INST_S[DONEINT]. + * compcode:8 [7:0] Indicates completion/error status of the CPT coprocessor + * for the associated instruction, as enumerated by CPT_COMP_E. + * Core software may write the memory location containing [COMPCODE] to + * 0x0 before ringing the doorbell, and then poll for completion by + * checking for a nonzero value. + * Once the core observes a nonzero [COMPCODE] value in this case,the CPT + * coprocessor will have also completed L2/DRAM write operations. + * Word 1 + * reserved + * + */ +union otx2_cpt_res_s { + u64 u[2]; + + struct cn9k_cpt_res_s { + u64 compcode:8; + u64 uc_compcode:8; + u64 doneint:1; + u64 reserved_17_63:47; + u64 reserved_64_127; + } s; + + struct cn10k_cpt_res_s { + u64 compcode:7; + u64 doneint:1; + u64 uc_compcode:8; + u64 rlen:16; + u64 spi:32; + u64 esn; + } cn10k; +}; + +/* + * Register (RVU_PF_BAR0) cpt#_af_constants1 + * + * CPT AF Constants Register + * This register contains implementation-related parameters of CPT. + */ +union otx2_cptx_af_constants1 { + u64 u; + struct otx2_cptx_af_constants1_s { + u64 se:16; + u64 ie:16; + u64 ae:16; + u64 reserved_48_63:16; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_misc_int + * + * This register contain the per-queue miscellaneous interrupts. + * + */ +union otx2_cptx_lf_misc_int { + u64 u; + struct otx2_cptx_lf_misc_int_s { + u64 reserved_0:1; + u64 nqerr:1; + u64 irde:1; + u64 nwrp:1; + u64 reserved_4:1; + u64 hwerr:1; + u64 fault:1; + u64 reserved_7_63:57; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_misc_int_ena_w1s + * + * This register sets interrupt enable bits. + * + */ +union otx2_cptx_lf_misc_int_ena_w1s { + u64 u; + struct otx2_cptx_lf_misc_int_ena_w1s_s { + u64 reserved_0:1; + u64 nqerr:1; + u64 irde:1; + u64 nwrp:1; + u64 reserved_4:1; + u64 hwerr:1; + u64 fault:1; + u64 reserved_7_63:57; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_ctl + * + * This register configures the queue. + * + * When the queue is not execution-quiescent (see CPT_LF_INPROG[EENA,INFLIGHT]), + * software must only write this register with [ENA]=0. + */ +union otx2_cptx_lf_ctl { + u64 u; + struct otx2_cptx_lf_ctl_s { + u64 ena:1; + u64 fc_ena:1; + u64 fc_up_crossing:1; + u64 reserved_3:1; + u64 fc_hyst_bits:4; + u64 reserved_8_63:56; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_done_wait + * + * This register specifies the per-queue interrupt coalescing settings. + */ +union otx2_cptx_lf_done_wait { + u64 u; + struct otx2_cptx_lf_done_wait_s { + u64 num_wait:20; + u64 reserved_20_31:12; + u64 time_wait:16; + u64 reserved_48_63:16; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_done + * + * This register contain the per-queue instruction done count. + */ +union otx2_cptx_lf_done { + u64 u; + struct otx2_cptx_lf_done_s { + u64 done:20; + u64 reserved_20_63:44; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_inprog + * + * These registers contain the per-queue instruction in flight registers. + * + */ +union otx2_cptx_lf_inprog { + u64 u; + struct otx2_cptx_lf_inprog_s { + u64 inflight:9; + u64 reserved_9_15:7; + u64 eena:1; + u64 grp_drp:1; + u64 reserved_18_30:13; + u64 grb_partial:1; + u64 grb_cnt:8; + u64 gwb_cnt:8; + u64 reserved_48_63:16; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_q_base + * + * CPT initializes these CSR fields to these values on any CPT_LF_Q_BASE write: + * _ CPT_LF_Q_INST_PTR[XQ_XOR]=0. + * _ CPT_LF_Q_INST_PTR[NQ_PTR]=2. + * _ CPT_LF_Q_INST_PTR[DQ_PTR]=2. + * _ CPT_LF_Q_GRP_PTR[XQ_XOR]=0. + * _ CPT_LF_Q_GRP_PTR[NQ_PTR]=1. + * _ CPT_LF_Q_GRP_PTR[DQ_PTR]=1. + */ +union otx2_cptx_lf_q_base { + u64 u; + struct otx2_cptx_lf_q_base_s { + u64 fault:1; + u64 reserved_1_6:6; + u64 addr:46; + u64 reserved_53_63:11; + } s; +}; + +/* + * RVU_PFVF_BAR2 - cpt_lf_q_size + * + * CPT initializes these CSR fields to these values on any CPT_LF_Q_SIZE write: + * _ CPT_LF_Q_INST_PTR[XQ_XOR]=0. + * _ CPT_LF_Q_INST_PTR[NQ_PTR]=2. + * _ CPT_LF_Q_INST_PTR[DQ_PTR]=2. + * _ CPT_LF_Q_GRP_PTR[XQ_XOR]=0. + * _ CPT_LF_Q_GRP_PTR[NQ_PTR]=1. + * _ CPT_LF_Q_GRP_PTR[DQ_PTR]=1. + */ +union otx2_cptx_lf_q_size { + u64 u; + struct otx2_cptx_lf_q_size_s { + u64 size_div40:15; + u64 reserved_15_63:49; + } s; +}; + +/* + * RVU_PF_BAR0 - cpt_af_lf_ctl + * + * This register configures queues. This register should be written only + * when the queue is execution-quiescent (see CPT_LF_INPROG[INFLIGHT]). + */ +union otx2_cptx_af_lf_ctrl { + u64 u; + struct otx2_cptx_af_lf_ctrl_s { + u64 pri:1; + u64 reserved_1_8:8; + u64 pf_func_inst:1; + u64 cont_err:1; + u64 reserved_11_15:5; + u64 nixtx_en:1; + u64 reserved_17_47:31; + u64 grp:8; + u64 reserved_56_63:8; + } s; +}; + +#endif /* __OTX2_CPT_HW_TYPES_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c new file mode 100644 index 0000000000..273ee5352a --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_mbox_common.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptlf.h" + +int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) +{ + int ret; + + otx2_mbox_msg_send(mbox, 0); + ret = otx2_mbox_wait_for_rsp(mbox, 0); + if (ret == -EIO) { + dev_err(&pdev->dev, "RVU MBOX timeout.\n"); + return ret; + } else if (ret) { + dev_err(&pdev->dev, "RVU MBOX error: %d.\n", ret); + return -EFAULT; + } + return ret; +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_mbox_msg, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev) +{ + struct mbox_msghdr *req; + + req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct ready_msg_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + req->id = MBOX_MSG_READY; + req->sig = OTX2_MBOX_REQ_SIG; + req->pcifunc = 0; + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_ready_msg, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cpt_send_af_reg_requests(struct otx2_mbox *mbox, struct pci_dev *pdev) +{ + return otx2_cpt_send_mbox_msg(mbox, pdev); +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_send_af_reg_requests, CRYPTO_DEV_OCTEONTX2_CPT); + +static int otx2_cpt_add_read_af_reg(struct otx2_mbox *mbox, + struct pci_dev *pdev, u64 reg, + u64 *val, int blkaddr) +{ + struct cpt_rd_wr_reg_msg *reg_msg; + + reg_msg = (struct cpt_rd_wr_reg_msg *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*reg_msg), + sizeof(*reg_msg)); + if (reg_msg == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + reg_msg->hdr.id = MBOX_MSG_CPT_RD_WR_REGISTER; + reg_msg->hdr.sig = OTX2_MBOX_REQ_SIG; + reg_msg->hdr.pcifunc = 0; + + reg_msg->is_write = 0; + reg_msg->reg_offset = reg; + reg_msg->ret_val = val; + reg_msg->blkaddr = blkaddr; + + return 0; +} + +int otx2_cpt_add_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 val, int blkaddr) +{ + struct cpt_rd_wr_reg_msg *reg_msg; + + reg_msg = (struct cpt_rd_wr_reg_msg *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*reg_msg), + sizeof(*reg_msg)); + if (reg_msg == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + reg_msg->hdr.id = MBOX_MSG_CPT_RD_WR_REGISTER; + reg_msg->hdr.sig = OTX2_MBOX_REQ_SIG; + reg_msg->hdr.pcifunc = 0; + + reg_msg->is_write = 1; + reg_msg->reg_offset = reg; + reg_msg->val = val; + reg_msg->blkaddr = blkaddr; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_add_write_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cpt_read_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 *val, int blkaddr) +{ + int ret; + + ret = otx2_cpt_add_read_af_reg(mbox, pdev, reg, val, blkaddr); + if (ret) + return ret; + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_read_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cpt_write_af_reg(struct otx2_mbox *mbox, struct pci_dev *pdev, + u64 reg, u64 val, int blkaddr) +{ + int ret; + + ret = otx2_cpt_add_write_af_reg(mbox, pdev, reg, val, blkaddr); + if (ret) + return ret; + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_write_af_reg, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cpt_attach_rscrs_msg(struct otx2_cptlfs_info *lfs) +{ + struct otx2_mbox *mbox = lfs->mbox; + struct rsrc_attach *req; + int ret; + + req = (struct rsrc_attach *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msg_rsp)); + if (req == NULL) { + dev_err(&lfs->pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + req->hdr.id = MBOX_MSG_ATTACH_RESOURCES; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = 0; + req->cptlfs = lfs->lfs_num; + req->cpt_blkaddr = lfs->blkaddr; + req->modify = 1; + ret = otx2_cpt_send_mbox_msg(mbox, lfs->pdev); + if (ret) + return ret; + + if (!lfs->are_lfs_attached) + ret = -EINVAL; + + return ret; +} + +int otx2_cpt_detach_rsrcs_msg(struct otx2_cptlfs_info *lfs) +{ + struct otx2_mbox *mbox = lfs->mbox; + struct rsrc_detach *req; + int ret; + + req = (struct rsrc_detach *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msg_rsp)); + if (req == NULL) { + dev_err(&lfs->pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + req->hdr.id = MBOX_MSG_DETACH_RESOURCES; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = 0; + req->cptlfs = 1; + ret = otx2_cpt_send_mbox_msg(mbox, lfs->pdev); + if (ret) + return ret; + + if (lfs->are_lfs_attached) + ret = -EINVAL; + + return ret; +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_detach_rsrcs_msg, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cpt_msix_offset_msg(struct otx2_cptlfs_info *lfs) +{ + struct otx2_mbox *mbox = lfs->mbox; + struct pci_dev *pdev = lfs->pdev; + struct mbox_msghdr *req; + int ret, i; + + req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msix_offset_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + + req->id = MBOX_MSG_MSIX_OFFSET; + req->sig = OTX2_MBOX_REQ_SIG; + req->pcifunc = 0; + ret = otx2_cpt_send_mbox_msg(mbox, pdev); + if (ret) + return ret; + + for (i = 0; i < lfs->lfs_num; i++) { + if (lfs->lf[i].msix_offset == MSIX_VECTOR_INVALID) { + dev_err(&pdev->dev, + "Invalid msix offset %d for LF %d\n", + lfs->lf[i].msix_offset, i); + return -EINVAL; + } + } + return ret; +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_msix_offset_msg, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cpt_sync_mbox_msg(struct otx2_mbox *mbox) +{ + int err; + + if (!otx2_mbox_nonempty(mbox, 0)) + return 0; + otx2_mbox_msg_send(mbox, 0); + err = otx2_mbox_wait_for_rsp(mbox, 0); + if (err) + return err; + + return otx2_mbox_check_rsp_msgs(mbox, 0); +} +EXPORT_SYMBOL_NS_GPL(otx2_cpt_sync_mbox_msg, CRYPTO_DEV_OCTEONTX2_CPT); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h new file mode 100644 index 0000000000..dbb1ee746f --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_reqmgr.h @@ -0,0 +1,197 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_REQMGR_H +#define __OTX2_CPT_REQMGR_H + +#include "otx2_cpt_common.h" + +/* Completion code size and initial value */ +#define OTX2_CPT_COMPLETION_CODE_SIZE 8 +#define OTX2_CPT_COMPLETION_CODE_INIT OTX2_CPT_COMP_E_NOTDONE +/* + * Maximum total number of SG buffers is 100, we divide it equally + * between input and output + */ +#define OTX2_CPT_MAX_SG_IN_CNT 50 +#define OTX2_CPT_MAX_SG_OUT_CNT 50 + +/* DMA mode direct or SG */ +#define OTX2_CPT_DMA_MODE_DIRECT 0 +#define OTX2_CPT_DMA_MODE_SG 1 + +/* Context source CPTR or DPTR */ +#define OTX2_CPT_FROM_CPTR 0 +#define OTX2_CPT_FROM_DPTR 1 + +#define OTX2_CPT_MAX_REQ_SIZE 65535 + +union otx2_cpt_opcode { + u16 flags; + struct { + u8 major; + u8 minor; + } s; +}; + +struct otx2_cptvf_request { + u32 param1; + u32 param2; + u16 dlen; + union otx2_cpt_opcode opcode; +}; + +/* + * CPT_INST_S software command definitions + * Words EI (0-3) + */ +union otx2_cpt_iq_cmd_word0 { + u64 u; + struct { + __be16 opcode; + __be16 param1; + __be16 param2; + __be16 dlen; + } s; +}; + +union otx2_cpt_iq_cmd_word3 { + u64 u; + struct { + u64 cptr:61; + u64 grp:3; + } s; +}; + +struct otx2_cpt_iq_command { + union otx2_cpt_iq_cmd_word0 cmd; + u64 dptr; + u64 rptr; + union otx2_cpt_iq_cmd_word3 cptr; +}; + +struct otx2_cpt_pending_entry { + void *completion_addr; /* Completion address */ + void *info; + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + u8 resume_sender; /* Notify sender to resume sending requests */ + u8 busy; /* Entry status (free/busy) */ +}; + +struct otx2_cpt_pending_queue { + struct otx2_cpt_pending_entry *head; /* Head of the queue */ + u32 front; /* Process work from here */ + u32 rear; /* Append new work here */ + u32 pending_count; /* Pending requests count */ + u32 qlen; /* Queue length */ + spinlock_t lock; /* Queue lock */ +}; + +struct otx2_cpt_buf_ptr { + u8 *vptr; + dma_addr_t dma_addr; + u16 size; +}; + +union otx2_cpt_ctrl_info { + u32 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u32 reserved_6_31:26; + u32 grp:3; /* Group bits */ + u32 dma_mode:2; /* DMA mode */ + u32 se_req:1; /* To SE core */ +#else + u32 se_req:1; /* To SE core */ + u32 dma_mode:2; /* DMA mode */ + u32 grp:3; /* Group bits */ + u32 reserved_6_31:26; +#endif + } s; +}; + +struct otx2_cpt_req_info { + /* Kernel async request callback */ + void (*callback)(int status, void *arg1, void *arg2); + struct crypto_async_request *areq; /* Async request callback arg */ + struct otx2_cptvf_request req;/* Request information (core specific) */ + union otx2_cpt_ctrl_info ctrl;/* User control information */ + struct otx2_cpt_buf_ptr in[OTX2_CPT_MAX_SG_IN_CNT]; + struct otx2_cpt_buf_ptr out[OTX2_CPT_MAX_SG_OUT_CNT]; + u8 *iv_out; /* IV to send back */ + u16 rlen; /* Output length */ + u8 in_cnt; /* Number of input buffers */ + u8 out_cnt; /* Number of output buffers */ + u8 req_type; /* Type of request */ + u8 is_enc; /* Is a request an encryption request */ + u8 is_trunc_hmac;/* Is truncated hmac used */ +}; + +struct otx2_cpt_inst_info { + struct otx2_cpt_pending_entry *pentry; + struct otx2_cpt_req_info *req; + struct pci_dev *pdev; + void *completion_addr; + u8 *out_buffer; + u8 *in_buffer; + dma_addr_t dptr_baddr; + dma_addr_t rptr_baddr; + dma_addr_t comp_baddr; + unsigned long time_in; + u32 dlen; + u32 dma_len; + u8 extra_time; +}; + +struct otx2_cpt_sglist_component { + __be16 len0; + __be16 len1; + __be16 len2; + __be16 len3; + __be64 ptr0; + __be64 ptr1; + __be64 ptr2; + __be64 ptr3; +}; + +static inline void otx2_cpt_info_destroy(struct pci_dev *pdev, + struct otx2_cpt_inst_info *info) +{ + struct otx2_cpt_req_info *req; + int i; + + if (info->dptr_baddr) + dma_unmap_single(&pdev->dev, info->dptr_baddr, + info->dma_len, DMA_BIDIRECTIONAL); + + if (info->req) { + req = info->req; + for (i = 0; i < req->out_cnt; i++) { + if (req->out[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->out[i].dma_addr, + req->out[i].size, + DMA_BIDIRECTIONAL); + } + + for (i = 0; i < req->in_cnt; i++) { + if (req->in[i].dma_addr) + dma_unmap_single(&pdev->dev, + req->in[i].dma_addr, + req->in[i].size, + DMA_BIDIRECTIONAL); + } + } + kfree(info); +} + +struct otx2_cptlf_wqe; +int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + int cpu_num); +void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe); +int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev); + +#endif /* __OTX2_CPT_REQMGR_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c new file mode 100644 index 0000000000..6edd27ff8c --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c @@ -0,0 +1,444 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptlf.h" +#include "rvu_reg.h" + +#define CPT_TIMER_HOLD 0x03F +#define CPT_COUNT_HOLD 32 + +static void cptlf_do_set_done_time_wait(struct otx2_cptlf_info *lf, + int time_wait) +{ + union otx2_cptx_lf_done_wait done_wait; + + done_wait.u = otx2_cpt_read64(lf->lfs->reg_base, lf->lfs->blkaddr, + lf->slot, OTX2_CPT_LF_DONE_WAIT); + done_wait.s.time_wait = time_wait; + otx2_cpt_write64(lf->lfs->reg_base, lf->lfs->blkaddr, lf->slot, + OTX2_CPT_LF_DONE_WAIT, done_wait.u); +} + +static void cptlf_do_set_done_num_wait(struct otx2_cptlf_info *lf, int num_wait) +{ + union otx2_cptx_lf_done_wait done_wait; + + done_wait.u = otx2_cpt_read64(lf->lfs->reg_base, lf->lfs->blkaddr, + lf->slot, OTX2_CPT_LF_DONE_WAIT); + done_wait.s.num_wait = num_wait; + otx2_cpt_write64(lf->lfs->reg_base, lf->lfs->blkaddr, lf->slot, + OTX2_CPT_LF_DONE_WAIT, done_wait.u); +} + +static void cptlf_set_done_time_wait(struct otx2_cptlfs_info *lfs, + int time_wait) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + cptlf_do_set_done_time_wait(&lfs->lf[slot], time_wait); +} + +static void cptlf_set_done_num_wait(struct otx2_cptlfs_info *lfs, int num_wait) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + cptlf_do_set_done_num_wait(&lfs->lf[slot], num_wait); +} + +static int cptlf_set_pri(struct otx2_cptlf_info *lf, int pri) +{ + struct otx2_cptlfs_info *lfs = lf->lfs; + union otx2_cptx_af_lf_ctrl lf_ctrl; + int ret; + + ret = otx2_cpt_read_af_reg(lfs->mbox, lfs->pdev, + CPT_AF_LFX_CTL(lf->slot), + &lf_ctrl.u, lfs->blkaddr); + if (ret) + return ret; + + lf_ctrl.s.pri = pri ? 1 : 0; + + ret = otx2_cpt_write_af_reg(lfs->mbox, lfs->pdev, + CPT_AF_LFX_CTL(lf->slot), + lf_ctrl.u, lfs->blkaddr); + return ret; +} + +static int cptlf_set_eng_grps_mask(struct otx2_cptlf_info *lf, + int eng_grps_mask) +{ + struct otx2_cptlfs_info *lfs = lf->lfs; + union otx2_cptx_af_lf_ctrl lf_ctrl; + int ret; + + ret = otx2_cpt_read_af_reg(lfs->mbox, lfs->pdev, + CPT_AF_LFX_CTL(lf->slot), + &lf_ctrl.u, lfs->blkaddr); + if (ret) + return ret; + + lf_ctrl.s.grp = eng_grps_mask; + + ret = otx2_cpt_write_af_reg(lfs->mbox, lfs->pdev, + CPT_AF_LFX_CTL(lf->slot), + lf_ctrl.u, lfs->blkaddr); + return ret; +} + +static int cptlf_set_grp_and_pri(struct otx2_cptlfs_info *lfs, + int eng_grp_mask, int pri) +{ + int slot, ret = 0; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + ret = cptlf_set_pri(&lfs->lf[slot], pri); + if (ret) + return ret; + + ret = cptlf_set_eng_grps_mask(&lfs->lf[slot], eng_grp_mask); + if (ret) + return ret; + } + return ret; +} + +static void cptlf_hw_init(struct otx2_cptlfs_info *lfs) +{ + /* Disable instruction queues */ + otx2_cptlf_disable_iqueues(lfs); + + /* Set instruction queues base addresses */ + otx2_cptlf_set_iqueues_base_addr(lfs); + + /* Set instruction queues sizes */ + otx2_cptlf_set_iqueues_size(lfs); + + /* Set done interrupts time wait */ + cptlf_set_done_time_wait(lfs, CPT_TIMER_HOLD); + + /* Set done interrupts num wait */ + cptlf_set_done_num_wait(lfs, CPT_COUNT_HOLD); + + /* Enable instruction queues */ + otx2_cptlf_enable_iqueues(lfs); +} + +static void cptlf_hw_cleanup(struct otx2_cptlfs_info *lfs) +{ + /* Disable instruction queues */ + otx2_cptlf_disable_iqueues(lfs); +} + +static void cptlf_set_misc_intrs(struct otx2_cptlfs_info *lfs, u8 enable) +{ + union otx2_cptx_lf_misc_int_ena_w1s irq_misc = { .u = 0x0 }; + u64 reg = enable ? OTX2_CPT_LF_MISC_INT_ENA_W1S : + OTX2_CPT_LF_MISC_INT_ENA_W1C; + int slot; + + irq_misc.s.fault = 0x1; + irq_misc.s.hwerr = 0x1; + irq_misc.s.irde = 0x1; + irq_misc.s.nqerr = 0x1; + irq_misc.s.nwrp = 0x1; + + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cpt_write64(lfs->reg_base, lfs->blkaddr, slot, reg, + irq_misc.u); +} + +static void cptlf_enable_intrs(struct otx2_cptlfs_info *lfs) +{ + int slot; + + /* Enable done interrupts */ + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cpt_write64(lfs->reg_base, lfs->blkaddr, slot, + OTX2_CPT_LF_DONE_INT_ENA_W1S, 0x1); + /* Enable Misc interrupts */ + cptlf_set_misc_intrs(lfs, true); +} + +static void cptlf_disable_intrs(struct otx2_cptlfs_info *lfs) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cpt_write64(lfs->reg_base, lfs->blkaddr, slot, + OTX2_CPT_LF_DONE_INT_ENA_W1C, 0x1); + cptlf_set_misc_intrs(lfs, false); +} + +static inline int cptlf_read_done_cnt(struct otx2_cptlf_info *lf) +{ + union otx2_cptx_lf_done irq_cnt; + + irq_cnt.u = otx2_cpt_read64(lf->lfs->reg_base, lf->lfs->blkaddr, lf->slot, + OTX2_CPT_LF_DONE); + return irq_cnt.s.done; +} + +static irqreturn_t cptlf_misc_intr_handler(int __always_unused irq, void *arg) +{ + union otx2_cptx_lf_misc_int irq_misc, irq_misc_ack; + struct otx2_cptlf_info *lf = arg; + struct device *dev; + + dev = &lf->lfs->pdev->dev; + irq_misc.u = otx2_cpt_read64(lf->lfs->reg_base, lf->lfs->blkaddr, + lf->slot, OTX2_CPT_LF_MISC_INT); + irq_misc_ack.u = 0x0; + + if (irq_misc.s.fault) { + dev_err(dev, "Memory error detected while executing CPT_INST_S, LF %d.\n", + lf->slot); + irq_misc_ack.s.fault = 0x1; + + } else if (irq_misc.s.hwerr) { + dev_err(dev, "HW error from an engine executing CPT_INST_S, LF %d.", + lf->slot); + irq_misc_ack.s.hwerr = 0x1; + + } else if (irq_misc.s.nwrp) { + dev_err(dev, "SMMU fault while writing CPT_RES_S to CPT_INST_S[RES_ADDR], LF %d.\n", + lf->slot); + irq_misc_ack.s.nwrp = 0x1; + + } else if (irq_misc.s.irde) { + dev_err(dev, "Memory error when accessing instruction memory queue CPT_LF_Q_BASE[ADDR].\n"); + irq_misc_ack.s.irde = 0x1; + + } else if (irq_misc.s.nqerr) { + dev_err(dev, "Error enqueuing an instruction received at CPT_LF_NQ.\n"); + irq_misc_ack.s.nqerr = 0x1; + + } else { + dev_err(dev, "Unhandled interrupt in CPT LF %d\n", lf->slot); + return IRQ_NONE; + } + + /* Acknowledge interrupts */ + otx2_cpt_write64(lf->lfs->reg_base, lf->lfs->blkaddr, lf->slot, + OTX2_CPT_LF_MISC_INT, irq_misc_ack.u); + + return IRQ_HANDLED; +} + +static irqreturn_t cptlf_done_intr_handler(int irq, void *arg) +{ + union otx2_cptx_lf_done_wait done_wait; + struct otx2_cptlf_info *lf = arg; + int irq_cnt; + + /* Read the number of completed requests */ + irq_cnt = cptlf_read_done_cnt(lf); + if (irq_cnt) { + done_wait.u = otx2_cpt_read64(lf->lfs->reg_base, lf->lfs->blkaddr, + lf->slot, OTX2_CPT_LF_DONE_WAIT); + /* Acknowledge the number of completed requests */ + otx2_cpt_write64(lf->lfs->reg_base, lf->lfs->blkaddr, lf->slot, + OTX2_CPT_LF_DONE_ACK, irq_cnt); + + otx2_cpt_write64(lf->lfs->reg_base, lf->lfs->blkaddr, lf->slot, + OTX2_CPT_LF_DONE_WAIT, done_wait.u); + if (unlikely(!lf->wqe)) { + dev_err(&lf->lfs->pdev->dev, "No work for LF %d\n", + lf->slot); + return IRQ_NONE; + } + + /* Schedule processing of completed requests */ + tasklet_hi_schedule(&lf->wqe->work); + } + return IRQ_HANDLED; +} + +void otx2_cptlf_unregister_interrupts(struct otx2_cptlfs_info *lfs) +{ + int i, offs, vector; + + for (i = 0; i < lfs->lfs_num; i++) { + for (offs = 0; offs < OTX2_CPT_LF_MSIX_VECTORS; offs++) { + if (!lfs->lf[i].is_irq_reg[offs]) + continue; + + vector = pci_irq_vector(lfs->pdev, + lfs->lf[i].msix_offset + offs); + free_irq(vector, &lfs->lf[i]); + lfs->lf[i].is_irq_reg[offs] = false; + } + } + cptlf_disable_intrs(lfs); +} +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_unregister_interrupts, + CRYPTO_DEV_OCTEONTX2_CPT); + +static int cptlf_do_register_interrrupts(struct otx2_cptlfs_info *lfs, + int lf_num, int irq_offset, + irq_handler_t handler) +{ + int ret, vector; + + vector = pci_irq_vector(lfs->pdev, lfs->lf[lf_num].msix_offset + + irq_offset); + ret = request_irq(vector, handler, 0, + lfs->lf[lf_num].irq_name[irq_offset], + &lfs->lf[lf_num]); + if (ret) + return ret; + + lfs->lf[lf_num].is_irq_reg[irq_offset] = true; + + return ret; +} + +int otx2_cptlf_register_interrupts(struct otx2_cptlfs_info *lfs) +{ + int irq_offs, ret, i; + + for (i = 0; i < lfs->lfs_num; i++) { + irq_offs = OTX2_CPT_LF_INT_VEC_E_MISC; + snprintf(lfs->lf[i].irq_name[irq_offs], 32, "CPTLF Misc%d", i); + ret = cptlf_do_register_interrrupts(lfs, i, irq_offs, + cptlf_misc_intr_handler); + if (ret) + goto free_irq; + + irq_offs = OTX2_CPT_LF_INT_VEC_E_DONE; + snprintf(lfs->lf[i].irq_name[irq_offs], 32, "OTX2_CPTLF Done%d", + i); + ret = cptlf_do_register_interrrupts(lfs, i, irq_offs, + cptlf_done_intr_handler); + if (ret) + goto free_irq; + } + cptlf_enable_intrs(lfs); + return 0; + +free_irq: + otx2_cptlf_unregister_interrupts(lfs); + return ret; +} +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_register_interrupts, CRYPTO_DEV_OCTEONTX2_CPT); + +void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs) +{ + int slot, offs; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + for (offs = 0; offs < OTX2_CPT_LF_MSIX_VECTORS; offs++) + irq_set_affinity_hint(pci_irq_vector(lfs->pdev, + lfs->lf[slot].msix_offset + + offs), NULL); + free_cpumask_var(lfs->lf[slot].affinity_mask); + } +} +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_free_irqs_affinity, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cptlf_set_irqs_affinity(struct otx2_cptlfs_info *lfs) +{ + struct otx2_cptlf_info *lf = lfs->lf; + int slot, offs, ret; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + if (!zalloc_cpumask_var(&lf[slot].affinity_mask, GFP_KERNEL)) { + dev_err(&lfs->pdev->dev, + "cpumask allocation failed for LF %d", slot); + ret = -ENOMEM; + goto free_affinity_mask; + } + + cpumask_set_cpu(cpumask_local_spread(slot, + dev_to_node(&lfs->pdev->dev)), + lf[slot].affinity_mask); + + for (offs = 0; offs < OTX2_CPT_LF_MSIX_VECTORS; offs++) { + ret = irq_set_affinity_hint(pci_irq_vector(lfs->pdev, + lf[slot].msix_offset + offs), + lf[slot].affinity_mask); + if (ret) + goto free_affinity_mask; + } + } + return 0; + +free_affinity_mask: + otx2_cptlf_free_irqs_affinity(lfs); + return ret; +} +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_set_irqs_affinity, CRYPTO_DEV_OCTEONTX2_CPT); + +int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri, + int lfs_num) +{ + int slot, ret; + + if (!lfs->pdev || !lfs->reg_base) + return -EINVAL; + + lfs->lfs_num = lfs_num; + for (slot = 0; slot < lfs->lfs_num; slot++) { + lfs->lf[slot].lfs = lfs; + lfs->lf[slot].slot = slot; + if (lfs->lmt_base) + lfs->lf[slot].lmtline = lfs->lmt_base + + (slot * LMTLINE_SIZE); + else + lfs->lf[slot].lmtline = lfs->reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot, + OTX2_CPT_LMT_LF_LMTLINEX(0)); + + lfs->lf[slot].ioreg = lfs->reg_base + + OTX2_CPT_RVU_FUNC_ADDR_S(lfs->blkaddr, slot, + OTX2_CPT_LF_NQX(0)); + } + /* Send request to attach LFs */ + ret = otx2_cpt_attach_rscrs_msg(lfs); + if (ret) + goto clear_lfs_num; + + ret = otx2_cpt_alloc_instruction_queues(lfs); + if (ret) { + dev_err(&lfs->pdev->dev, + "Allocating instruction queues failed\n"); + goto detach_rsrcs; + } + cptlf_hw_init(lfs); + /* + * Allow each LF to execute requests destined to any of 8 engine + * groups and set queue priority of each LF to high + */ + ret = cptlf_set_grp_and_pri(lfs, eng_grp_mask, pri); + if (ret) + goto free_iq; + + return 0; + +free_iq: + otx2_cpt_free_instruction_queues(lfs); + cptlf_hw_cleanup(lfs); +detach_rsrcs: + otx2_cpt_detach_rsrcs_msg(lfs); +clear_lfs_num: + lfs->lfs_num = 0; + return ret; +} +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_init, CRYPTO_DEV_OCTEONTX2_CPT); + +void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs) +{ + lfs->lfs_num = 0; + /* Cleanup LFs hardware side */ + cptlf_hw_cleanup(lfs); + /* Send request to detach LFs */ + otx2_cpt_detach_rsrcs_msg(lfs); +} +EXPORT_SYMBOL_NS_GPL(otx2_cptlf_shutdown, CRYPTO_DEV_OCTEONTX2_CPT); + +MODULE_AUTHOR("Marvell"); +MODULE_DESCRIPTION("Marvell RVU CPT Common module"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h new file mode 100644 index 0000000000..5302fe3d0e --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h @@ -0,0 +1,390 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ +#ifndef __OTX2_CPTLF_H +#define __OTX2_CPTLF_H + +#include <linux/soc/marvell/octeontx2/asm.h> +#include <mbox.h> +#include <rvu.h> +#include "otx2_cpt_common.h" +#include "otx2_cpt_reqmgr.h" + +/* + * CPT instruction and pending queues user requested length in CPT_INST_S msgs + */ +#define OTX2_CPT_USER_REQUESTED_QLEN_MSGS 8200 + +/* + * CPT instruction queue size passed to HW is in units of 40*CPT_INST_S + * messages. + */ +#define OTX2_CPT_SIZE_DIV40 (OTX2_CPT_USER_REQUESTED_QLEN_MSGS/40) + +/* + * CPT instruction and pending queues length in CPT_INST_S messages + */ +#define OTX2_CPT_INST_QLEN_MSGS ((OTX2_CPT_SIZE_DIV40 - 1) * 40) + +/* + * LDWB is getting incorrectly used when IQB_LDWB = 1 and CPT instruction + * queue has less than 320 free entries. So, increase HW instruction queue + * size by 320 and give 320 entries less for SW/NIX RX as a workaround. + */ +#define OTX2_CPT_INST_QLEN_EXTRA_BYTES (320 * OTX2_CPT_INST_SIZE) +#define OTX2_CPT_EXTRA_SIZE_DIV40 (320/40) + +/* CPT instruction queue length in bytes */ +#define OTX2_CPT_INST_QLEN_BYTES \ + ((OTX2_CPT_SIZE_DIV40 * 40 * OTX2_CPT_INST_SIZE) + \ + OTX2_CPT_INST_QLEN_EXTRA_BYTES) + +/* CPT instruction group queue length in bytes */ +#define OTX2_CPT_INST_GRP_QLEN_BYTES \ + ((OTX2_CPT_SIZE_DIV40 + OTX2_CPT_EXTRA_SIZE_DIV40) * 16) + +/* CPT FC length in bytes */ +#define OTX2_CPT_Q_FC_LEN 128 + +/* CPT instruction queue alignment */ +#define OTX2_CPT_INST_Q_ALIGNMENT 128 + +/* Mask which selects all engine groups */ +#define OTX2_CPT_ALL_ENG_GRPS_MASK 0xFF + +/* Maximum LFs supported in OcteonTX2 for CPT */ +#define OTX2_CPT_MAX_LFS_NUM 64 + +/* Queue priority */ +#define OTX2_CPT_QUEUE_HI_PRIO 0x1 +#define OTX2_CPT_QUEUE_LOW_PRIO 0x0 + +enum otx2_cptlf_state { + OTX2_CPTLF_IN_RESET, + OTX2_CPTLF_STARTED, +}; + +struct otx2_cpt_inst_queue { + u8 *vaddr; + u8 *real_vaddr; + dma_addr_t dma_addr; + dma_addr_t real_dma_addr; + u32 size; +}; + +struct otx2_cptlfs_info; +struct otx2_cptlf_wqe { + struct tasklet_struct work; + struct otx2_cptlfs_info *lfs; + u8 lf_num; +}; + +struct otx2_cptlf_info { + struct otx2_cptlfs_info *lfs; /* Ptr to cptlfs_info struct */ + void __iomem *lmtline; /* Address of LMTLINE */ + void __iomem *ioreg; /* LMTLINE send register */ + int msix_offset; /* MSI-X interrupts offset */ + cpumask_var_t affinity_mask; /* IRQs affinity mask */ + u8 irq_name[OTX2_CPT_LF_MSIX_VECTORS][32];/* Interrupts name */ + u8 is_irq_reg[OTX2_CPT_LF_MSIX_VECTORS]; /* Is interrupt registered */ + u8 slot; /* Slot number of this LF */ + + struct otx2_cpt_inst_queue iqueue;/* Instruction queue */ + struct otx2_cpt_pending_queue pqueue; /* Pending queue */ + struct otx2_cptlf_wqe *wqe; /* Tasklet work info */ +}; + +struct cpt_hw_ops { + void (*send_cmd)(union otx2_cpt_inst_s *cptinst, u32 insts_num, + struct otx2_cptlf_info *lf); + u8 (*cpt_get_compcode)(union otx2_cpt_res_s *result); + u8 (*cpt_get_uc_compcode)(union otx2_cpt_res_s *result); +}; + +struct otx2_cptlfs_info { + /* Registers start address of VF/PF LFs are attached to */ + void __iomem *reg_base; +#define LMTLINE_SIZE 128 + void __iomem *lmt_base; + struct pci_dev *pdev; /* Device LFs are attached to */ + struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM]; + struct otx2_mbox *mbox; + struct cpt_hw_ops *ops; + u8 are_lfs_attached; /* Whether CPT LFs are attached */ + u8 lfs_num; /* Number of CPT LFs */ + u8 kcrypto_eng_grp_num; /* Kernel crypto engine group number */ + u8 kvf_limits; /* Kernel crypto limits */ + atomic_t state; /* LF's state. started/reset */ + int blkaddr; /* CPT blkaddr: BLKADDR_CPT0/BLKADDR_CPT1 */ +}; + +static inline void otx2_cpt_free_instruction_queues( + struct otx2_cptlfs_info *lfs) +{ + struct otx2_cpt_inst_queue *iq; + int i; + + for (i = 0; i < lfs->lfs_num; i++) { + iq = &lfs->lf[i].iqueue; + if (iq->real_vaddr) + dma_free_coherent(&lfs->pdev->dev, + iq->size, + iq->real_vaddr, + iq->real_dma_addr); + iq->real_vaddr = NULL; + iq->vaddr = NULL; + } +} + +static inline int otx2_cpt_alloc_instruction_queues( + struct otx2_cptlfs_info *lfs) +{ + struct otx2_cpt_inst_queue *iq; + int ret = 0, i; + + if (!lfs->lfs_num) + return -EINVAL; + + for (i = 0; i < lfs->lfs_num; i++) { + iq = &lfs->lf[i].iqueue; + iq->size = OTX2_CPT_INST_QLEN_BYTES + + OTX2_CPT_Q_FC_LEN + + OTX2_CPT_INST_GRP_QLEN_BYTES + + OTX2_CPT_INST_Q_ALIGNMENT; + iq->real_vaddr = dma_alloc_coherent(&lfs->pdev->dev, iq->size, + &iq->real_dma_addr, GFP_KERNEL); + if (!iq->real_vaddr) { + ret = -ENOMEM; + goto error; + } + iq->vaddr = iq->real_vaddr + OTX2_CPT_INST_GRP_QLEN_BYTES; + iq->dma_addr = iq->real_dma_addr + OTX2_CPT_INST_GRP_QLEN_BYTES; + + /* Align pointers */ + iq->vaddr = PTR_ALIGN(iq->vaddr, OTX2_CPT_INST_Q_ALIGNMENT); + iq->dma_addr = PTR_ALIGN(iq->dma_addr, + OTX2_CPT_INST_Q_ALIGNMENT); + } + return 0; + +error: + otx2_cpt_free_instruction_queues(lfs); + return ret; +} + +static inline void otx2_cptlf_set_iqueues_base_addr( + struct otx2_cptlfs_info *lfs) +{ + union otx2_cptx_lf_q_base lf_q_base; + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + lf_q_base.u = lfs->lf[slot].iqueue.dma_addr; + otx2_cpt_write64(lfs->reg_base, lfs->blkaddr, slot, + OTX2_CPT_LF_Q_BASE, lf_q_base.u); + } +} + +static inline void otx2_cptlf_do_set_iqueue_size(struct otx2_cptlf_info *lf) +{ + union otx2_cptx_lf_q_size lf_q_size = { .u = 0x0 }; + + lf_q_size.s.size_div40 = OTX2_CPT_SIZE_DIV40 + + OTX2_CPT_EXTRA_SIZE_DIV40; + otx2_cpt_write64(lf->lfs->reg_base, lf->lfs->blkaddr, lf->slot, + OTX2_CPT_LF_Q_SIZE, lf_q_size.u); +} + +static inline void otx2_cptlf_set_iqueues_size(struct otx2_cptlfs_info *lfs) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cptlf_do_set_iqueue_size(&lfs->lf[slot]); +} + +static inline void otx2_cptlf_do_disable_iqueue(struct otx2_cptlf_info *lf) +{ + union otx2_cptx_lf_ctl lf_ctl = { .u = 0x0 }; + union otx2_cptx_lf_inprog lf_inprog; + u8 blkaddr = lf->lfs->blkaddr; + int timeout = 20; + + /* Disable instructions enqueuing */ + otx2_cpt_write64(lf->lfs->reg_base, blkaddr, lf->slot, + OTX2_CPT_LF_CTL, lf_ctl.u); + + /* Wait for instruction queue to become empty */ + do { + lf_inprog.u = otx2_cpt_read64(lf->lfs->reg_base, blkaddr, + lf->slot, OTX2_CPT_LF_INPROG); + if (!lf_inprog.s.inflight) + break; + + usleep_range(10000, 20000); + if (timeout-- < 0) { + dev_err(&lf->lfs->pdev->dev, + "Error LF %d is still busy.\n", lf->slot); + break; + } + + } while (1); + + /* + * Disable executions in the LF's queue, + * the queue should be empty at this point + */ + lf_inprog.s.eena = 0x0; + otx2_cpt_write64(lf->lfs->reg_base, blkaddr, lf->slot, + OTX2_CPT_LF_INPROG, lf_inprog.u); +} + +static inline void otx2_cptlf_disable_iqueues(struct otx2_cptlfs_info *lfs) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) + otx2_cptlf_do_disable_iqueue(&lfs->lf[slot]); +} + +static inline void otx2_cptlf_set_iqueue_enq(struct otx2_cptlf_info *lf, + bool enable) +{ + u8 blkaddr = lf->lfs->blkaddr; + union otx2_cptx_lf_ctl lf_ctl; + + lf_ctl.u = otx2_cpt_read64(lf->lfs->reg_base, blkaddr, lf->slot, + OTX2_CPT_LF_CTL); + + /* Set iqueue's enqueuing */ + lf_ctl.s.ena = enable ? 0x1 : 0x0; + otx2_cpt_write64(lf->lfs->reg_base, blkaddr, lf->slot, + OTX2_CPT_LF_CTL, lf_ctl.u); +} + +static inline void otx2_cptlf_enable_iqueue_enq(struct otx2_cptlf_info *lf) +{ + otx2_cptlf_set_iqueue_enq(lf, true); +} + +static inline void otx2_cptlf_set_iqueue_exec(struct otx2_cptlf_info *lf, + bool enable) +{ + union otx2_cptx_lf_inprog lf_inprog; + u8 blkaddr = lf->lfs->blkaddr; + + lf_inprog.u = otx2_cpt_read64(lf->lfs->reg_base, blkaddr, lf->slot, + OTX2_CPT_LF_INPROG); + + /* Set iqueue's execution */ + lf_inprog.s.eena = enable ? 0x1 : 0x0; + otx2_cpt_write64(lf->lfs->reg_base, blkaddr, lf->slot, + OTX2_CPT_LF_INPROG, lf_inprog.u); +} + +static inline void otx2_cptlf_enable_iqueue_exec(struct otx2_cptlf_info *lf) +{ + otx2_cptlf_set_iqueue_exec(lf, true); +} + +static inline void otx2_cptlf_disable_iqueue_exec(struct otx2_cptlf_info *lf) +{ + otx2_cptlf_set_iqueue_exec(lf, false); +} + +static inline void otx2_cptlf_enable_iqueues(struct otx2_cptlfs_info *lfs) +{ + int slot; + + for (slot = 0; slot < lfs->lfs_num; slot++) { + otx2_cptlf_enable_iqueue_exec(&lfs->lf[slot]); + otx2_cptlf_enable_iqueue_enq(&lfs->lf[slot]); + } +} + +static inline void otx2_cpt_fill_inst(union otx2_cpt_inst_s *cptinst, + struct otx2_cpt_iq_command *iq_cmd, + u64 comp_baddr) +{ + cptinst->u[0] = 0x0; + cptinst->s.doneint = true; + cptinst->s.res_addr = comp_baddr; + cptinst->u[2] = 0x0; + cptinst->u[3] = 0x0; + cptinst->s.ei0 = iq_cmd->cmd.u; + cptinst->s.ei1 = iq_cmd->dptr; + cptinst->s.ei2 = iq_cmd->rptr; + cptinst->s.ei3 = iq_cmd->cptr.u; +} + +/* + * On OcteonTX2 platform the parameter insts_num is used as a count of + * instructions to be enqueued. The valid values for insts_num are: + * 1 - 1 CPT instruction will be enqueued during LMTST operation + * 2 - 2 CPT instructions will be enqueued during LMTST operation + */ +static inline void otx2_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, + u32 insts_num, struct otx2_cptlf_info *lf) +{ + void __iomem *lmtline = lf->lmtline; + long ret; + + /* + * Make sure memory areas pointed in CPT_INST_S + * are flushed before the instruction is sent to CPT + */ + dma_wmb(); + + do { + /* Copy CPT command to LMTLINE */ + memcpy_toio(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE); + + /* + * LDEOR initiates atomic transfer to I/O device + * The following will cause the LMTST to fail (the LDEOR + * returns zero): + * - No stores have been performed to the LMTLINE since it was + * last invalidated. + * - The bytes which have been stored to LMTLINE since it was + * last invalidated form a pattern that is non-contiguous, does + * not start at byte 0, or does not end on a 8-byte boundary. + * (i.e.comprises a formation of other than 1–16 8-byte + * words.) + * + * These rules are designed such that an operating system + * context switch or hypervisor guest switch need have no + * knowledge of the LMTST operations; the switch code does not + * need to store to LMTCANCEL. Also note as LMTLINE data cannot + * be read, there is no information leakage between processes. + */ + ret = otx2_lmt_flush(lf->ioreg); + + } while (!ret); +} + +static inline bool otx2_cptlf_started(struct otx2_cptlfs_info *lfs) +{ + return atomic_read(&lfs->state) == OTX2_CPTLF_STARTED; +} + +static inline void otx2_cptlf_set_dev_info(struct otx2_cptlfs_info *lfs, + struct pci_dev *pdev, + void __iomem *reg_base, + struct otx2_mbox *mbox, + int blkaddr) +{ + lfs->pdev = pdev; + lfs->reg_base = reg_base; + lfs->mbox = mbox; + lfs->blkaddr = blkaddr; +} + +int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_msk, int pri, + int lfs_num); +void otx2_cptlf_shutdown(struct otx2_cptlfs_info *lfs); +int otx2_cptlf_register_interrupts(struct otx2_cptlfs_info *lfs); +void otx2_cptlf_unregister_interrupts(struct otx2_cptlfs_info *lfs); +void otx2_cptlf_free_irqs_affinity(struct otx2_cptlfs_info *lfs); +int otx2_cptlf_set_irqs_affinity(struct otx2_cptlfs_info *lfs); + +#endif /* __OTX2_CPTLF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h new file mode 100644 index 0000000000..a209ec5af3 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPTPF_H +#define __OTX2_CPTPF_H + +#include "otx2_cpt_common.h" +#include "otx2_cptpf_ucode.h" +#include "otx2_cptlf.h" + +struct otx2_cptpf_dev; +struct otx2_cptvf_info { + struct otx2_cptpf_dev *cptpf; /* PF pointer this VF belongs to */ + struct work_struct vfpf_mbox_work; + struct pci_dev *vf_dev; + int vf_id; + int intr_idx; +}; + +struct cptpf_flr_work { + struct work_struct work; + struct otx2_cptpf_dev *pf; +}; + +struct otx2_cptpf_dev { + void __iomem *reg_base; /* CPT PF registers start address */ + void __iomem *afpf_mbox_base; /* PF-AF mbox start address */ + void __iomem *vfpf_mbox_base; /* VF-PF mbox start address */ + struct pci_dev *pdev; /* PCI device handle */ + struct otx2_cptvf_info vf[OTX2_CPT_MAX_VFS_NUM]; + struct otx2_cpt_eng_grps eng_grps;/* Engine groups information */ + struct otx2_cptlfs_info lfs; /* CPT LFs attached to this PF */ + struct otx2_cptlfs_info cpt1_lfs; /* CPT1 LFs attached to this PF */ + /* HW capabilities for each engine type */ + union otx2_cpt_eng_caps eng_caps[OTX2_CPT_MAX_ENG_TYPES]; + bool is_eng_caps_discovered; + + /* AF <=> PF mbox */ + struct otx2_mbox afpf_mbox; + struct work_struct afpf_mbox_work; + struct workqueue_struct *afpf_mbox_wq; + + struct otx2_mbox afpf_mbox_up; + struct work_struct afpf_mbox_up_work; + + /* VF <=> PF mbox */ + struct otx2_mbox vfpf_mbox; + struct workqueue_struct *vfpf_mbox_wq; + + struct workqueue_struct *flr_wq; + struct cptpf_flr_work *flr_work; + struct mutex lock; /* serialize mailbox access */ + + unsigned long cap_flag; + u8 pf_id; /* RVU PF number */ + u8 max_vfs; /* Maximum number of VFs supported by CPT */ + u8 enabled_vfs; /* Number of enabled VFs */ + u8 sso_pf_func_ovrd; /* SSO PF_FUNC override bit */ + u8 kvf_limits; /* Kernel crypto limits */ + bool has_cpt1; + u8 rsrc_req_blkaddr; + + /* Devlink */ + struct devlink *dl; +}; + +irqreturn_t otx2_cptpf_afpf_mbox_intr(int irq, void *arg); +void otx2_cptpf_afpf_mbox_handler(struct work_struct *work); +void otx2_cptpf_afpf_mbox_up_handler(struct work_struct *work); +irqreturn_t otx2_cptpf_vfpf_mbox_intr(int irq, void *arg); +void otx2_cptpf_vfpf_mbox_handler(struct work_struct *work); + +#endif /* __OTX2_CPTPF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c new file mode 100644 index 0000000000..e34223daa3 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c @@ -0,0 +1,880 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include <linux/firmware.h> +#include "otx2_cpt_hw_types.h" +#include "otx2_cpt_common.h" +#include "otx2_cpt_devlink.h" +#include "otx2_cptpf_ucode.h" +#include "otx2_cptpf.h" +#include "cn10k_cpt.h" +#include "rvu_reg.h" + +#define OTX2_CPT_DRV_NAME "rvu_cptpf" +#define OTX2_CPT_DRV_STRING "Marvell RVU CPT Physical Function Driver" + +#define CPT_UC_RID_CN9K_B0 1 + +static void cptpf_enable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + int ena_bits; + + /* Clear any pending interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(0), ~0x0ULL); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(1), ~0x0ULL); + + /* Enable VF interrupts for VFs from 0 to 63 */ + ena_bits = ((num_vfs - 1) % 64); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INT_ENA_W1SX(0), + GENMASK_ULL(ena_bits, 0)); + + if (num_vfs > 64) { + /* Enable VF interrupts for VFs from 64 to 127 */ + ena_bits = num_vfs - 64 - 1; + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INT_ENA_W1SX(1), + GENMASK_ULL(ena_bits, 0)); + } +} + +static void cptpf_disable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + int vector; + + /* Disable VF-PF interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INT_ENA_W1CX(0), ~0ULL); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INT_ENA_W1CX(1), ~0ULL); + /* Clear any pending interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(0), ~0ULL); + + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFPF_MBOX0); + free_irq(vector, cptpf); + + if (num_vfs > 64) { + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(1), ~0ULL); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFPF_MBOX1); + free_irq(vector, cptpf); + } +} + +static void cptpf_enable_vf_flr_me_intrs(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + /* Clear FLR interrupt if any */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0), + INTR_MASK(num_vfs)); + + /* Enable VF FLR interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1SX(0), INTR_MASK(num_vfs)); + /* Clear ME interrupt if any */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFME_INTX(0), + INTR_MASK(num_vfs)); + /* Enable VF ME interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INT_ENA_W1SX(0), INTR_MASK(num_vfs)); + + if (num_vfs <= 64) + return; + + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1), + INTR_MASK(num_vfs - 64)); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1SX(1), INTR_MASK(num_vfs - 64)); + + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFME_INTX(1), + INTR_MASK(num_vfs - 64)); + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INT_ENA_W1SX(1), INTR_MASK(num_vfs - 64)); +} + +static void cptpf_disable_vf_flr_me_intrs(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + int vector; + + /* Disable VF FLR interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1CX(0), INTR_MASK(num_vfs)); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR0); + free_irq(vector, cptpf); + + /* Disable VF ME interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INT_ENA_W1CX(0), INTR_MASK(num_vfs)); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFME0); + free_irq(vector, cptpf); + + if (num_vfs <= 64) + return; + + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1CX(1), INTR_MASK(num_vfs - 64)); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR1); + free_irq(vector, cptpf); + + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INT_ENA_W1CX(1), INTR_MASK(num_vfs - 64)); + vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFME1); + free_irq(vector, cptpf); +} + +static void cptpf_flr_wq_handler(struct work_struct *work) +{ + struct cptpf_flr_work *flr_work; + struct otx2_cptpf_dev *pf; + struct mbox_msghdr *req; + struct otx2_mbox *mbox; + int vf, reg = 0; + + flr_work = container_of(work, struct cptpf_flr_work, work); + pf = flr_work->pf; + mbox = &pf->afpf_mbox; + + vf = flr_work - pf->flr_work; + + mutex_lock(&pf->lock); + req = otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct msg_rsp)); + if (!req) { + mutex_unlock(&pf->lock); + return; + } + + req->sig = OTX2_MBOX_REQ_SIG; + req->id = MBOX_MSG_VF_FLR; + req->pcifunc &= RVU_PFVF_FUNC_MASK; + req->pcifunc |= (vf + 1) & RVU_PFVF_FUNC_MASK; + + otx2_cpt_send_mbox_msg(mbox, pf->pdev); + if (!otx2_cpt_sync_mbox_msg(&pf->afpf_mbox)) { + + if (vf >= 64) { + reg = 1; + vf = vf - 64; + } + /* Clear transaction pending register */ + otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFTRPENDX(reg), BIT_ULL(vf)); + otx2_cpt_write64(pf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1SX(reg), BIT_ULL(vf)); + } + mutex_unlock(&pf->lock); +} + +static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg) +{ + int reg, dev, vf, start_vf, num_reg = 1; + struct otx2_cptpf_dev *cptpf = arg; + u64 intr; + + if (cptpf->max_vfs > 64) + num_reg = 2; + + for (reg = 0; reg < num_reg; reg++) { + intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INTX(reg)); + if (!intr) + continue; + start_vf = 64 * reg; + for (vf = 0; vf < 64; vf++) { + if (!(intr & BIT_ULL(vf))) + continue; + dev = vf + start_vf; + queue_work(cptpf->flr_wq, &cptpf->flr_work[dev].work); + /* Clear interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INTX(reg), BIT_ULL(vf)); + /* Disable the interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFFLR_INT_ENA_W1CX(reg), + BIT_ULL(vf)); + } + } + return IRQ_HANDLED; +} + +static irqreturn_t cptpf_vf_me_intr(int __always_unused irq, void *arg) +{ + struct otx2_cptpf_dev *cptpf = arg; + int reg, vf, num_reg = 1; + u64 intr; + + if (cptpf->max_vfs > 64) + num_reg = 2; + + for (reg = 0; reg < num_reg; reg++) { + intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INTX(reg)); + if (!intr) + continue; + for (vf = 0; vf < 64; vf++) { + if (!(intr & BIT_ULL(vf))) + continue; + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFTRPENDX(reg), BIT_ULL(vf)); + /* Clear interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFME_INTX(reg), BIT_ULL(vf)); + } + } + return IRQ_HANDLED; +} + +static void cptpf_unregister_vfpf_intr(struct otx2_cptpf_dev *cptpf, + int num_vfs) +{ + cptpf_disable_vfpf_mbox_intr(cptpf, num_vfs); + cptpf_disable_vf_flr_me_intrs(cptpf, num_vfs); +} + +static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs) +{ + struct pci_dev *pdev = cptpf->pdev; + struct device *dev = &pdev->dev; + int ret, vector; + + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX0); + /* Register VF-PF mailbox interrupt handler */ + ret = request_irq(vector, otx2_cptpf_vfpf_mbox_intr, 0, "CPTVFPF Mbox0", + cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for PFVF mbox0 irq\n"); + return ret; + } + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR0); + /* Register VF FLR interrupt handler */ + ret = request_irq(vector, cptpf_vf_flr_intr, 0, "CPTPF FLR0", cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for VFFLR0 irq\n"); + goto free_mbox0_irq; + } + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME0); + /* Register VF ME interrupt handler */ + ret = request_irq(vector, cptpf_vf_me_intr, 0, "CPTPF ME0", cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for PFVF mbox0 irq\n"); + goto free_flr0_irq; + } + + if (num_vfs > 64) { + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1); + ret = request_irq(vector, otx2_cptpf_vfpf_mbox_intr, 0, + "CPTVFPF Mbox1", cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for PFVF mbox1 irq\n"); + goto free_me0_irq; + } + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1); + /* Register VF FLR interrupt handler */ + ret = request_irq(vector, cptpf_vf_flr_intr, 0, "CPTPF FLR1", + cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for VFFLR1 irq\n"); + goto free_mbox1_irq; + } + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME1); + /* Register VF FLR interrupt handler */ + ret = request_irq(vector, cptpf_vf_me_intr, 0, "CPTPF ME1", + cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for VFFLR1 irq\n"); + goto free_flr1_irq; + } + } + cptpf_enable_vfpf_mbox_intr(cptpf, num_vfs); + cptpf_enable_vf_flr_me_intrs(cptpf, num_vfs); + + return 0; + +free_flr1_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1); + free_irq(vector, cptpf); +free_mbox1_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1); + free_irq(vector, cptpf); +free_me0_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME0); + free_irq(vector, cptpf); +free_flr0_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR0); + free_irq(vector, cptpf); +free_mbox0_irq: + vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX0); + free_irq(vector, cptpf); + return ret; +} + +static void cptpf_flr_wq_destroy(struct otx2_cptpf_dev *pf) +{ + if (!pf->flr_wq) + return; + destroy_workqueue(pf->flr_wq); + pf->flr_wq = NULL; + kfree(pf->flr_work); +} + +static int cptpf_flr_wq_init(struct otx2_cptpf_dev *cptpf, int num_vfs) +{ + int vf; + + cptpf->flr_wq = alloc_ordered_workqueue("cptpf_flr_wq", 0); + if (!cptpf->flr_wq) + return -ENOMEM; + + cptpf->flr_work = kcalloc(num_vfs, sizeof(struct cptpf_flr_work), + GFP_KERNEL); + if (!cptpf->flr_work) + goto destroy_wq; + + for (vf = 0; vf < num_vfs; vf++) { + cptpf->flr_work[vf].pf = cptpf; + INIT_WORK(&cptpf->flr_work[vf].work, cptpf_flr_wq_handler); + } + return 0; + +destroy_wq: + destroy_workqueue(cptpf->flr_wq); + return -ENOMEM; +} + +static int cptpf_vfpf_mbox_init(struct otx2_cptpf_dev *cptpf, int num_vfs) +{ + struct device *dev = &cptpf->pdev->dev; + u64 vfpf_mbox_base; + int err, i; + + cptpf->vfpf_mbox_wq = + alloc_ordered_workqueue("cpt_vfpf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); + if (!cptpf->vfpf_mbox_wq) + return -ENOMEM; + + /* Map VF-PF mailbox memory */ + if (test_bit(CN10K_MBOX, &cptpf->cap_flag)) + vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_MBOX_ADDR); + else + vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_BAR4_ADDR); + + if (!vfpf_mbox_base) { + dev_err(dev, "VF-PF mailbox address not configured\n"); + err = -ENOMEM; + goto free_wqe; + } + cptpf->vfpf_mbox_base = devm_ioremap_wc(dev, vfpf_mbox_base, + MBOX_SIZE * cptpf->max_vfs); + if (!cptpf->vfpf_mbox_base) { + dev_err(dev, "Mapping of VF-PF mailbox address failed\n"); + err = -ENOMEM; + goto free_wqe; + } + err = otx2_mbox_init(&cptpf->vfpf_mbox, cptpf->vfpf_mbox_base, + cptpf->pdev, cptpf->reg_base, MBOX_DIR_PFVF, + num_vfs); + if (err) + goto free_wqe; + + for (i = 0; i < num_vfs; i++) { + cptpf->vf[i].vf_id = i; + cptpf->vf[i].cptpf = cptpf; + cptpf->vf[i].intr_idx = i % 64; + INIT_WORK(&cptpf->vf[i].vfpf_mbox_work, + otx2_cptpf_vfpf_mbox_handler); + } + return 0; + +free_wqe: + destroy_workqueue(cptpf->vfpf_mbox_wq); + return err; +} + +static void cptpf_vfpf_mbox_destroy(struct otx2_cptpf_dev *cptpf) +{ + destroy_workqueue(cptpf->vfpf_mbox_wq); + otx2_mbox_destroy(&cptpf->vfpf_mbox); +} + +static void cptpf_disable_afpf_mbox_intr(struct otx2_cptpf_dev *cptpf) +{ + /* Disable AF-PF interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT_ENA_W1C, + 0x1ULL); + /* Clear interrupt if any */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT, 0x1ULL); +} + +static int cptpf_register_afpf_mbox_intr(struct otx2_cptpf_dev *cptpf) +{ + struct pci_dev *pdev = cptpf->pdev; + struct device *dev = &pdev->dev; + int ret, irq; + + irq = pci_irq_vector(pdev, RVU_PF_INT_VEC_AFPF_MBOX); + /* Register AF-PF mailbox interrupt handler */ + ret = devm_request_irq(dev, irq, otx2_cptpf_afpf_mbox_intr, 0, + "CPTAFPF Mbox", cptpf); + if (ret) { + dev_err(dev, + "IRQ registration failed for PFAF mbox irq\n"); + return ret; + } + /* Clear interrupt if any, to avoid spurious interrupts */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT, 0x1ULL); + /* Enable AF-PF interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT_ENA_W1S, + 0x1ULL); + + ret = otx2_cpt_send_ready_msg(&cptpf->afpf_mbox, cptpf->pdev); + if (ret) { + dev_warn(dev, + "AF not responding to mailbox, deferring probe\n"); + cptpf_disable_afpf_mbox_intr(cptpf); + return -EPROBE_DEFER; + } + return 0; +} + +static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf) +{ + struct pci_dev *pdev = cptpf->pdev; + resource_size_t offset; + int err; + + cptpf->afpf_mbox_wq = + alloc_ordered_workqueue("cpt_afpf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); + if (!cptpf->afpf_mbox_wq) + return -ENOMEM; + + offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); + /* Map AF-PF mailbox memory */ + cptpf->afpf_mbox_base = devm_ioremap_wc(&pdev->dev, offset, MBOX_SIZE); + if (!cptpf->afpf_mbox_base) { + dev_err(&pdev->dev, "Unable to map BAR4\n"); + err = -ENOMEM; + goto error; + } + + err = otx2_mbox_init(&cptpf->afpf_mbox, cptpf->afpf_mbox_base, + pdev, cptpf->reg_base, MBOX_DIR_PFAF, 1); + if (err) + goto error; + + err = otx2_mbox_init(&cptpf->afpf_mbox_up, cptpf->afpf_mbox_base, + pdev, cptpf->reg_base, MBOX_DIR_PFAF_UP, 1); + if (err) + goto mbox_cleanup; + + INIT_WORK(&cptpf->afpf_mbox_work, otx2_cptpf_afpf_mbox_handler); + INIT_WORK(&cptpf->afpf_mbox_up_work, otx2_cptpf_afpf_mbox_up_handler); + mutex_init(&cptpf->lock); + + return 0; + +mbox_cleanup: + otx2_mbox_destroy(&cptpf->afpf_mbox); +error: + destroy_workqueue(cptpf->afpf_mbox_wq); + return err; +} + +static void cptpf_afpf_mbox_destroy(struct otx2_cptpf_dev *cptpf) +{ + destroy_workqueue(cptpf->afpf_mbox_wq); + otx2_mbox_destroy(&cptpf->afpf_mbox); + otx2_mbox_destroy(&cptpf->afpf_mbox_up); +} + +static ssize_t sso_pf_func_ovrd_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", cptpf->sso_pf_func_ovrd); +} + +static ssize_t sso_pf_func_ovrd_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev); + u8 sso_pf_func_ovrd; + + if (!(cptpf->pdev->revision == CPT_UC_RID_CN9K_B0)) + return count; + + if (kstrtou8(buf, 0, &sso_pf_func_ovrd)) + return -EINVAL; + + cptpf->sso_pf_func_ovrd = sso_pf_func_ovrd; + + return count; +} + +static ssize_t kvf_limits_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev); + + return sprintf(buf, "%d\n", cptpf->kvf_limits); +} + +static ssize_t kvf_limits_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev); + int lfs_num; + int ret; + + ret = kstrtoint(buf, 0, &lfs_num); + if (ret) + return ret; + if (lfs_num < 1 || lfs_num > num_online_cpus()) { + dev_err(dev, "lfs count %d must be in range [1 - %d]\n", + lfs_num, num_online_cpus()); + return -EINVAL; + } + cptpf->kvf_limits = lfs_num; + + return count; +} + +static DEVICE_ATTR_RW(kvf_limits); +static DEVICE_ATTR_RW(sso_pf_func_ovrd); + +static struct attribute *cptpf_attrs[] = { + &dev_attr_kvf_limits.attr, + &dev_attr_sso_pf_func_ovrd.attr, + NULL +}; + +static const struct attribute_group cptpf_sysfs_group = { + .attrs = cptpf_attrs, +}; + +static int cpt_is_pf_usable(struct otx2_cptpf_dev *cptpf) +{ + u64 rev; + + rev = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_BLOCK_ADDRX_DISC(BLKADDR_RVUM)); + rev = (rev >> 12) & 0xFF; + /* + * Check if AF has setup revision for RVUM block, otherwise + * driver probe should be deferred until AF driver comes up + */ + if (!rev) { + dev_warn(&cptpf->pdev->dev, + "AF is not initialized, deferring probe\n"); + return -EPROBE_DEFER; + } + return 0; +} + +static int cptx_device_reset(struct otx2_cptpf_dev *cptpf, int blkaddr) +{ + int timeout = 10, ret; + u64 reg = 0; + + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_BLK_RST, 0x1, blkaddr); + if (ret) + return ret; + + do { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_BLK_RST, ®, blkaddr); + if (ret) + return ret; + + if (!((reg >> 63) & 0x1)) + break; + + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + } while (1); + + return ret; +} + +static int cptpf_device_reset(struct otx2_cptpf_dev *cptpf) +{ + int ret = 0; + + if (cptpf->has_cpt1) { + ret = cptx_device_reset(cptpf, BLKADDR_CPT1); + if (ret) + return ret; + } + return cptx_device_reset(cptpf, BLKADDR_CPT0); +} + +static void cptpf_check_block_implemented(struct otx2_cptpf_dev *cptpf) +{ + u64 cfg; + + cfg = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_BLOCK_ADDRX_DISC(BLKADDR_CPT1)); + if (cfg & BIT_ULL(11)) + cptpf->has_cpt1 = true; +} + +static int cptpf_device_init(struct otx2_cptpf_dev *cptpf) +{ + union otx2_cptx_af_constants1 af_cnsts1 = {0}; + int ret = 0; + + /* check if 'implemented' bit is set for block BLKADDR_CPT1 */ + cptpf_check_block_implemented(cptpf); + /* Reset the CPT PF device */ + ret = cptpf_device_reset(cptpf); + if (ret) + return ret; + + /* Get number of SE, IE and AE engines */ + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_CONSTANTS1, &af_cnsts1.u, + BLKADDR_CPT0); + if (ret) + return ret; + + cptpf->eng_grps.avail.max_se_cnt = af_cnsts1.s.se; + cptpf->eng_grps.avail.max_ie_cnt = af_cnsts1.s.ie; + cptpf->eng_grps.avail.max_ae_cnt = af_cnsts1.s.ae; + + /* Disable all cores */ + ret = otx2_cpt_disable_all_cores(cptpf); + + return ret; +} + +static int cptpf_sriov_disable(struct pci_dev *pdev) +{ + struct otx2_cptpf_dev *cptpf = pci_get_drvdata(pdev); + int num_vfs = pci_num_vf(pdev); + + if (!num_vfs) + return 0; + + pci_disable_sriov(pdev); + cptpf_unregister_vfpf_intr(cptpf, num_vfs); + cptpf_flr_wq_destroy(cptpf); + cptpf_vfpf_mbox_destroy(cptpf); + module_put(THIS_MODULE); + cptpf->enabled_vfs = 0; + + return 0; +} + +static int cptpf_sriov_enable(struct pci_dev *pdev, int num_vfs) +{ + struct otx2_cptpf_dev *cptpf = pci_get_drvdata(pdev); + int ret; + + /* Initialize VF<=>PF mailbox */ + ret = cptpf_vfpf_mbox_init(cptpf, num_vfs); + if (ret) + return ret; + + ret = cptpf_flr_wq_init(cptpf, num_vfs); + if (ret) + goto destroy_mbox; + /* Register VF<=>PF mailbox interrupt */ + ret = cptpf_register_vfpf_intr(cptpf, num_vfs); + if (ret) + goto destroy_flr; + + /* Get CPT HW capabilities using LOAD_FVC operation. */ + ret = otx2_cpt_discover_eng_capabilities(cptpf); + if (ret) + goto disable_intr; + + ret = otx2_cpt_create_eng_grps(cptpf, &cptpf->eng_grps); + if (ret) + goto disable_intr; + + cptpf->enabled_vfs = num_vfs; + ret = pci_enable_sriov(pdev, num_vfs); + if (ret) + goto disable_intr; + + dev_notice(&cptpf->pdev->dev, "VFs enabled: %d\n", num_vfs); + + try_module_get(THIS_MODULE); + return num_vfs; + +disable_intr: + cptpf_unregister_vfpf_intr(cptpf, num_vfs); + cptpf->enabled_vfs = 0; +destroy_flr: + cptpf_flr_wq_destroy(cptpf); +destroy_mbox: + cptpf_vfpf_mbox_destroy(cptpf); + return ret; +} + +static int otx2_cptpf_sriov_configure(struct pci_dev *pdev, int num_vfs) +{ + if (num_vfs > 0) { + return cptpf_sriov_enable(pdev, num_vfs); + } else { + return cptpf_sriov_disable(pdev); + } +} + +static int otx2_cptpf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct otx2_cptpf_dev *cptpf; + int err; + + cptpf = devm_kzalloc(dev, sizeof(*cptpf), GFP_KERNEL); + if (!cptpf) + return -ENOMEM; + + err = pcim_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + + err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto clear_drvdata; + } + /* Map PF's configuration registers */ + err = pcim_iomap_regions_request_all(pdev, 1 << PCI_PF_REG_BAR_NUM, + OTX2_CPT_DRV_NAME); + if (err) { + dev_err(dev, "Couldn't get PCI resources 0x%x\n", err); + goto clear_drvdata; + } + pci_set_master(pdev); + pci_set_drvdata(pdev, cptpf); + cptpf->pdev = pdev; + + cptpf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM]; + + /* Check if AF driver is up, otherwise defer probe */ + err = cpt_is_pf_usable(cptpf); + if (err) + goto clear_drvdata; + + err = pci_alloc_irq_vectors(pdev, RVU_PF_INT_VEC_CNT, + RVU_PF_INT_VEC_CNT, PCI_IRQ_MSIX); + if (err < 0) { + dev_err(dev, "Request for %d msix vectors failed\n", + RVU_PF_INT_VEC_CNT); + goto clear_drvdata; + } + otx2_cpt_set_hw_caps(pdev, &cptpf->cap_flag); + /* Initialize AF-PF mailbox */ + err = cptpf_afpf_mbox_init(cptpf); + if (err) + goto clear_drvdata; + /* Register mailbox interrupt */ + err = cptpf_register_afpf_mbox_intr(cptpf); + if (err) + goto destroy_afpf_mbox; + + cptpf->max_vfs = pci_sriov_get_totalvfs(pdev); + + err = cn10k_cptpf_lmtst_init(cptpf); + if (err) + goto unregister_intr; + + /* Initialize CPT PF device */ + err = cptpf_device_init(cptpf); + if (err) + goto unregister_intr; + + /* Initialize engine groups */ + err = otx2_cpt_init_eng_grps(pdev, &cptpf->eng_grps); + if (err) + goto unregister_intr; + + err = sysfs_create_group(&dev->kobj, &cptpf_sysfs_group); + if (err) + goto cleanup_eng_grps; + + err = otx2_cpt_register_dl(cptpf); + if (err) + goto sysfs_grp_del; + + return 0; + +sysfs_grp_del: + sysfs_remove_group(&dev->kobj, &cptpf_sysfs_group); +cleanup_eng_grps: + otx2_cpt_cleanup_eng_grps(pdev, &cptpf->eng_grps); +unregister_intr: + cptpf_disable_afpf_mbox_intr(cptpf); +destroy_afpf_mbox: + cptpf_afpf_mbox_destroy(cptpf); +clear_drvdata: + pci_set_drvdata(pdev, NULL); + return err; +} + +static void otx2_cptpf_remove(struct pci_dev *pdev) +{ + struct otx2_cptpf_dev *cptpf = pci_get_drvdata(pdev); + + if (!cptpf) + return; + + cptpf_sriov_disable(pdev); + otx2_cpt_unregister_dl(cptpf); + /* Delete sysfs entry created for kernel VF limits */ + sysfs_remove_group(&pdev->dev.kobj, &cptpf_sysfs_group); + /* Cleanup engine groups */ + otx2_cpt_cleanup_eng_grps(pdev, &cptpf->eng_grps); + /* Disable AF-PF mailbox interrupt */ + cptpf_disable_afpf_mbox_intr(cptpf); + /* Destroy AF-PF mbox */ + cptpf_afpf_mbox_destroy(cptpf); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id otx2_cpt_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX2_CPT_PCI_PF_DEVICE_ID) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CN10K_CPT_PCI_PF_DEVICE_ID) }, + { 0, } /* end of table */ +}; + +static struct pci_driver otx2_cpt_pci_driver = { + .name = OTX2_CPT_DRV_NAME, + .id_table = otx2_cpt_id_table, + .probe = otx2_cptpf_probe, + .remove = otx2_cptpf_remove, + .sriov_configure = otx2_cptpf_sriov_configure +}; + +module_pci_driver(otx2_cpt_pci_driver); + +MODULE_IMPORT_NS(CRYPTO_DEV_OCTEONTX2_CPT); + +MODULE_AUTHOR("Marvell"); +MODULE_DESCRIPTION(OTX2_CPT_DRV_STRING); +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(pci, otx2_cpt_id_table); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c new file mode 100644 index 0000000000..480b3720f1 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_mbox.c @@ -0,0 +1,608 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptpf.h" +#include "rvu_reg.h" + +/* Fastpath ipsec opcode with inplace processing */ +#define CPT_INLINE_RX_OPCODE (0x26 | (1 << 6)) +#define CN10K_CPT_INLINE_RX_OPCODE (0x29 | (1 << 6)) + +#define cpt_inline_rx_opcode(pdev) \ +({ \ + u8 opcode; \ + if (is_dev_otx2(pdev)) \ + opcode = CPT_INLINE_RX_OPCODE; \ + else \ + opcode = CN10K_CPT_INLINE_RX_OPCODE; \ + (opcode); \ +}) + +/* + * CPT PF driver version, It will be incremented by 1 for every feature + * addition in CPT mailbox messages. + */ +#define OTX2_CPT_PF_DRV_VERSION 0x1 + +static int forward_to_af(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req, int size) +{ + struct mbox_msghdr *msg; + int ret; + + mutex_lock(&cptpf->lock); + msg = otx2_mbox_alloc_msg(&cptpf->afpf_mbox, 0, size); + if (msg == NULL) { + mutex_unlock(&cptpf->lock); + return -ENOMEM; + } + + memcpy((uint8_t *)msg + sizeof(struct mbox_msghdr), + (uint8_t *)req + sizeof(struct mbox_msghdr), size); + msg->id = req->id; + msg->pcifunc = req->pcifunc; + msg->sig = req->sig; + msg->ver = req->ver; + + ret = otx2_cpt_sync_mbox_msg(&cptpf->afpf_mbox); + /* Error code -EIO indicate there is a communication failure + * to the AF. Rest of the error codes indicate that AF processed + * VF messages and set the error codes in response messages + * (if any) so simply forward responses to VF. + */ + if (ret == -EIO) { + dev_warn(&cptpf->pdev->dev, + "AF not responding to VF%d messages\n", vf->vf_id); + mutex_unlock(&cptpf->lock); + return ret; + } + mutex_unlock(&cptpf->lock); + return 0; +} + +static int handle_msg_get_caps(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req) +{ + struct otx2_cpt_caps_rsp *rsp; + + rsp = (struct otx2_cpt_caps_rsp *) + otx2_mbox_alloc_msg(&cptpf->vfpf_mbox, vf->vf_id, + sizeof(*rsp)); + if (!rsp) + return -ENOMEM; + + rsp->hdr.id = MBOX_MSG_GET_CAPS; + rsp->hdr.sig = OTX2_MBOX_RSP_SIG; + rsp->hdr.pcifunc = req->pcifunc; + rsp->cpt_pf_drv_version = OTX2_CPT_PF_DRV_VERSION; + rsp->cpt_revision = cptpf->pdev->revision; + memcpy(&rsp->eng_caps, &cptpf->eng_caps, sizeof(rsp->eng_caps)); + + return 0; +} + +static int handle_msg_get_eng_grp_num(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req) +{ + struct otx2_cpt_egrp_num_msg *grp_req; + struct otx2_cpt_egrp_num_rsp *rsp; + + grp_req = (struct otx2_cpt_egrp_num_msg *)req; + rsp = (struct otx2_cpt_egrp_num_rsp *) + otx2_mbox_alloc_msg(&cptpf->vfpf_mbox, vf->vf_id, sizeof(*rsp)); + if (!rsp) + return -ENOMEM; + + rsp->hdr.id = MBOX_MSG_GET_ENG_GRP_NUM; + rsp->hdr.sig = OTX2_MBOX_RSP_SIG; + rsp->hdr.pcifunc = req->pcifunc; + rsp->eng_type = grp_req->eng_type; + rsp->eng_grp_num = otx2_cpt_get_eng_grp(&cptpf->eng_grps, + grp_req->eng_type); + + return 0; +} + +static int handle_msg_kvf_limits(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req) +{ + struct otx2_cpt_kvf_limits_rsp *rsp; + + rsp = (struct otx2_cpt_kvf_limits_rsp *) + otx2_mbox_alloc_msg(&cptpf->vfpf_mbox, vf->vf_id, sizeof(*rsp)); + if (!rsp) + return -ENOMEM; + + rsp->hdr.id = MBOX_MSG_GET_KVF_LIMITS; + rsp->hdr.sig = OTX2_MBOX_RSP_SIG; + rsp->hdr.pcifunc = req->pcifunc; + rsp->kvf_limits = cptpf->kvf_limits; + + return 0; +} + +static int send_inline_ipsec_inbound_msg(struct otx2_cptpf_dev *cptpf, + int sso_pf_func, u8 slot) +{ + struct cpt_inline_ipsec_cfg_msg *req; + struct pci_dev *pdev = cptpf->pdev; + + req = (struct cpt_inline_ipsec_cfg_msg *) + otx2_mbox_alloc_msg_rsp(&cptpf->afpf_mbox, 0, + sizeof(*req), sizeof(struct msg_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + memset(req, 0, sizeof(*req)); + req->hdr.id = MBOX_MSG_CPT_INLINE_IPSEC_CFG; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = OTX2_CPT_RVU_PFFUNC(cptpf->pf_id, 0); + req->dir = CPT_INLINE_INBOUND; + req->slot = slot; + req->sso_pf_func_ovrd = cptpf->sso_pf_func_ovrd; + req->sso_pf_func = sso_pf_func; + req->enable = 1; + + return otx2_cpt_send_mbox_msg(&cptpf->afpf_mbox, pdev); +} + +static int rx_inline_ipsec_lf_cfg(struct otx2_cptpf_dev *cptpf, u8 egrp, + struct otx2_cpt_rx_inline_lf_cfg *req) +{ + struct nix_inline_ipsec_cfg *nix_req; + struct pci_dev *pdev = cptpf->pdev; + int ret; + + nix_req = (struct nix_inline_ipsec_cfg *) + otx2_mbox_alloc_msg_rsp(&cptpf->afpf_mbox, 0, + sizeof(*nix_req), + sizeof(struct msg_rsp)); + if (nix_req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + memset(nix_req, 0, sizeof(*nix_req)); + nix_req->hdr.id = MBOX_MSG_NIX_INLINE_IPSEC_CFG; + nix_req->hdr.sig = OTX2_MBOX_REQ_SIG; + nix_req->enable = 1; + if (!req->credit || req->credit > OTX2_CPT_INST_QLEN_MSGS) + nix_req->cpt_credit = OTX2_CPT_INST_QLEN_MSGS - 1; + else + nix_req->cpt_credit = req->credit - 1; + nix_req->gen_cfg.egrp = egrp; + if (req->opcode) + nix_req->gen_cfg.opcode = req->opcode; + else + nix_req->gen_cfg.opcode = cpt_inline_rx_opcode(pdev); + nix_req->gen_cfg.param1 = req->param1; + nix_req->gen_cfg.param2 = req->param2; + nix_req->inst_qsel.cpt_pf_func = OTX2_CPT_RVU_PFFUNC(cptpf->pf_id, 0); + nix_req->inst_qsel.cpt_slot = 0; + ret = otx2_cpt_send_mbox_msg(&cptpf->afpf_mbox, pdev); + if (ret) + return ret; + + if (cptpf->has_cpt1) { + ret = send_inline_ipsec_inbound_msg(cptpf, req->sso_pf_func, 1); + if (ret) + return ret; + } + + return send_inline_ipsec_inbound_msg(cptpf, req->sso_pf_func, 0); +} + +static int handle_msg_rx_inline_ipsec_lf_cfg(struct otx2_cptpf_dev *cptpf, + struct mbox_msghdr *req) +{ + struct otx2_cpt_rx_inline_lf_cfg *cfg_req; + u8 egrp; + int ret; + + cfg_req = (struct otx2_cpt_rx_inline_lf_cfg *)req; + if (cptpf->lfs.lfs_num) { + dev_err(&cptpf->pdev->dev, + "LF is already configured for RX inline ipsec.\n"); + return -EEXIST; + } + /* + * Allow LFs to execute requests destined to only grp IE_TYPES and + * set queue priority of each LF to high + */ + egrp = otx2_cpt_get_eng_grp(&cptpf->eng_grps, OTX2_CPT_IE_TYPES); + if (egrp == OTX2_CPT_INVALID_CRYPTO_ENG_GRP) { + dev_err(&cptpf->pdev->dev, + "Engine group for inline ipsec is not available\n"); + return -ENOENT; + } + + otx2_cptlf_set_dev_info(&cptpf->lfs, cptpf->pdev, cptpf->reg_base, + &cptpf->afpf_mbox, BLKADDR_CPT0); + ret = otx2_cptlf_init(&cptpf->lfs, 1 << egrp, OTX2_CPT_QUEUE_HI_PRIO, + 1); + if (ret) { + dev_err(&cptpf->pdev->dev, + "LF configuration failed for RX inline ipsec.\n"); + return ret; + } + + if (cptpf->has_cpt1) { + cptpf->rsrc_req_blkaddr = BLKADDR_CPT1; + otx2_cptlf_set_dev_info(&cptpf->cpt1_lfs, cptpf->pdev, + cptpf->reg_base, &cptpf->afpf_mbox, + BLKADDR_CPT1); + ret = otx2_cptlf_init(&cptpf->cpt1_lfs, 1 << egrp, + OTX2_CPT_QUEUE_HI_PRIO, 1); + if (ret) { + dev_err(&cptpf->pdev->dev, + "LF configuration failed for RX inline ipsec.\n"); + goto lf_cleanup; + } + cptpf->rsrc_req_blkaddr = 0; + } + + ret = rx_inline_ipsec_lf_cfg(cptpf, egrp, cfg_req); + if (ret) + goto lf1_cleanup; + + return 0; + +lf1_cleanup: + otx2_cptlf_shutdown(&cptpf->cpt1_lfs); +lf_cleanup: + otx2_cptlf_shutdown(&cptpf->lfs); + return ret; +} + +static int cptpf_handle_vf_req(struct otx2_cptpf_dev *cptpf, + struct otx2_cptvf_info *vf, + struct mbox_msghdr *req, int size) +{ + int err = 0; + + /* Check if msg is valid, if not reply with an invalid msg */ + if (req->sig != OTX2_MBOX_REQ_SIG) + goto inval_msg; + + switch (req->id) { + case MBOX_MSG_GET_ENG_GRP_NUM: + err = handle_msg_get_eng_grp_num(cptpf, vf, req); + break; + case MBOX_MSG_GET_CAPS: + err = handle_msg_get_caps(cptpf, vf, req); + break; + case MBOX_MSG_GET_KVF_LIMITS: + err = handle_msg_kvf_limits(cptpf, vf, req); + break; + case MBOX_MSG_RX_INLINE_IPSEC_LF_CFG: + err = handle_msg_rx_inline_ipsec_lf_cfg(cptpf, req); + break; + + default: + err = forward_to_af(cptpf, vf, req, size); + break; + } + return err; + +inval_msg: + otx2_reply_invalid_msg(&cptpf->vfpf_mbox, vf->vf_id, 0, req->id); + otx2_mbox_msg_send(&cptpf->vfpf_mbox, vf->vf_id); + return err; +} + +irqreturn_t otx2_cptpf_vfpf_mbox_intr(int __always_unused irq, void *arg) +{ + struct otx2_cptpf_dev *cptpf = arg; + struct otx2_cptvf_info *vf; + int i, vf_idx; + u64 intr; + + /* + * Check which VF has raised an interrupt and schedule + * corresponding work queue to process the messages + */ + for (i = 0; i < 2; i++) { + /* Read the interrupt bits */ + intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, + RVU_PF_VFPF_MBOX_INTX(i)); + + for (vf_idx = i * 64; vf_idx < cptpf->enabled_vfs; vf_idx++) { + vf = &cptpf->vf[vf_idx]; + if (intr & (1ULL << vf->intr_idx)) { + queue_work(cptpf->vfpf_mbox_wq, + &vf->vfpf_mbox_work); + /* Clear the interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, + 0, RVU_PF_VFPF_MBOX_INTX(i), + BIT_ULL(vf->intr_idx)); + } + } + } + return IRQ_HANDLED; +} + +void otx2_cptpf_vfpf_mbox_handler(struct work_struct *work) +{ + struct otx2_cptpf_dev *cptpf; + struct otx2_cptvf_info *vf; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *req_hdr; + struct mbox_msghdr *msg; + struct otx2_mbox *mbox; + int offset, i, err; + + vf = container_of(work, struct otx2_cptvf_info, vfpf_mbox_work); + cptpf = vf->cptpf; + mbox = &cptpf->vfpf_mbox; + /* sync with mbox memory region */ + smp_rmb(); + mdev = &mbox->dev[vf->vf_id]; + /* Process received mbox messages */ + req_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + offset = mbox->rx_start + ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN); + + for (i = 0; i < req_hdr->num_msgs; i++) { + msg = (struct mbox_msghdr *)(mdev->mbase + offset); + + /* Set which VF sent this message based on mbox IRQ */ + msg->pcifunc = ((u16)cptpf->pf_id << RVU_PFVF_PF_SHIFT) | + ((vf->vf_id + 1) & RVU_PFVF_FUNC_MASK); + + err = cptpf_handle_vf_req(cptpf, vf, msg, + msg->next_msgoff - offset); + /* + * Behave as the AF, drop the msg if there is + * no memory, timeout handling also goes here + */ + if (err == -ENOMEM || err == -EIO) + break; + offset = msg->next_msgoff; + /* Write barrier required for VF responses which are handled by + * PF driver and not forwarded to AF. + */ + smp_wmb(); + } + /* Send mbox responses to VF */ + if (mdev->num_msgs) + otx2_mbox_msg_send(mbox, vf->vf_id); +} + +irqreturn_t otx2_cptpf_afpf_mbox_intr(int __always_unused irq, void *arg) +{ + struct otx2_cptpf_dev *cptpf = arg; + struct otx2_mbox_dev *mdev; + struct otx2_mbox *mbox; + struct mbox_hdr *hdr; + u64 intr; + + /* Read the interrupt bits */ + intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT); + + if (intr & 0x1ULL) { + mbox = &cptpf->afpf_mbox; + mdev = &mbox->dev[0]; + hdr = mdev->mbase + mbox->rx_start; + if (hdr->num_msgs) + /* Schedule work queue function to process the MBOX request */ + queue_work(cptpf->afpf_mbox_wq, &cptpf->afpf_mbox_work); + + mbox = &cptpf->afpf_mbox_up; + mdev = &mbox->dev[0]; + hdr = mdev->mbase + mbox->rx_start; + if (hdr->num_msgs) + /* Schedule work queue function to process the MBOX request */ + queue_work(cptpf->afpf_mbox_wq, &cptpf->afpf_mbox_up_work); + /* Clear and ack the interrupt */ + otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_INT, + 0x1ULL); + } + return IRQ_HANDLED; +} + +static void process_afpf_mbox_msg(struct otx2_cptpf_dev *cptpf, + struct mbox_msghdr *msg) +{ + struct otx2_cptlfs_info *lfs = &cptpf->lfs; + struct device *dev = &cptpf->pdev->dev; + struct cpt_rd_wr_reg_msg *rsp_rd_wr; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(dev, "MBOX msg with unknown ID %d\n", msg->id); + return; + } + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(dev, "MBOX msg with wrong signature %x, ID %d\n", + msg->sig, msg->id); + return; + } + if (cptpf->rsrc_req_blkaddr == BLKADDR_CPT1) + lfs = &cptpf->cpt1_lfs; + + switch (msg->id) { + case MBOX_MSG_READY: + cptpf->pf_id = (msg->pcifunc >> RVU_PFVF_PF_SHIFT) & + RVU_PFVF_PF_MASK; + break; + case MBOX_MSG_CPT_RD_WR_REGISTER: + rsp_rd_wr = (struct cpt_rd_wr_reg_msg *)msg; + if (msg->rc) { + dev_err(dev, "Reg %llx rd/wr(%d) failed %d\n", + rsp_rd_wr->reg_offset, rsp_rd_wr->is_write, + msg->rc); + return; + } + if (!rsp_rd_wr->is_write) + *rsp_rd_wr->ret_val = rsp_rd_wr->val; + break; + case MBOX_MSG_ATTACH_RESOURCES: + if (!msg->rc) + lfs->are_lfs_attached = 1; + break; + case MBOX_MSG_DETACH_RESOURCES: + if (!msg->rc) + lfs->are_lfs_attached = 0; + break; + case MBOX_MSG_CPT_INLINE_IPSEC_CFG: + case MBOX_MSG_NIX_INLINE_IPSEC_CFG: + break; + + default: + dev_err(dev, + "Unsupported msg %d received.\n", msg->id); + break; + } +} + +static void forward_to_vf(struct otx2_cptpf_dev *cptpf, struct mbox_msghdr *msg, + int vf_id, int size) +{ + struct otx2_mbox *vfpf_mbox; + struct mbox_msghdr *fwd; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(&cptpf->pdev->dev, + "MBOX msg with unknown ID %d\n", msg->id); + return; + } + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(&cptpf->pdev->dev, + "MBOX msg with wrong signature %x, ID %d\n", + msg->sig, msg->id); + return; + } + vfpf_mbox = &cptpf->vfpf_mbox; + vf_id--; + if (vf_id >= cptpf->enabled_vfs) { + dev_err(&cptpf->pdev->dev, + "MBOX msg to unknown VF: %d >= %d\n", + vf_id, cptpf->enabled_vfs); + return; + } + if (msg->id == MBOX_MSG_VF_FLR) + return; + + fwd = otx2_mbox_alloc_msg(vfpf_mbox, vf_id, size); + if (!fwd) { + dev_err(&cptpf->pdev->dev, + "Forwarding to VF%d failed.\n", vf_id); + return; + } + memcpy((uint8_t *)fwd + sizeof(struct mbox_msghdr), + (uint8_t *)msg + sizeof(struct mbox_msghdr), size); + fwd->id = msg->id; + fwd->pcifunc = msg->pcifunc; + fwd->sig = msg->sig; + fwd->ver = msg->ver; + fwd->rc = msg->rc; +} + +/* Handle mailbox messages received from AF */ +void otx2_cptpf_afpf_mbox_handler(struct work_struct *work) +{ + struct otx2_cptpf_dev *cptpf; + struct otx2_mbox *afpf_mbox; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + int offset, vf_id, i; + + cptpf = container_of(work, struct otx2_cptpf_dev, afpf_mbox_work); + afpf_mbox = &cptpf->afpf_mbox; + mdev = &afpf_mbox->dev[0]; + /* Sync mbox data into memory */ + smp_wmb(); + + rsp_hdr = (struct mbox_hdr *)(mdev->mbase + afpf_mbox->rx_start); + offset = ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + + for (i = 0; i < rsp_hdr->num_msgs; i++) { + msg = (struct mbox_msghdr *)(mdev->mbase + afpf_mbox->rx_start + + offset); + vf_id = (msg->pcifunc >> RVU_PFVF_FUNC_SHIFT) & + RVU_PFVF_FUNC_MASK; + if (vf_id > 0) + forward_to_vf(cptpf, msg, vf_id, + msg->next_msgoff - offset); + else + process_afpf_mbox_msg(cptpf, msg); + + offset = msg->next_msgoff; + /* Sync VF response ready to be sent */ + smp_wmb(); + mdev->msgs_acked++; + } + otx2_mbox_reset(afpf_mbox, 0); +} + +static void handle_msg_cpt_inst_lmtst(struct otx2_cptpf_dev *cptpf, + struct mbox_msghdr *msg) +{ + struct cpt_inst_lmtst_req *req = (struct cpt_inst_lmtst_req *)msg; + struct otx2_cptlfs_info *lfs = &cptpf->lfs; + struct msg_rsp *rsp; + + if (cptpf->lfs.lfs_num) + lfs->ops->send_cmd((union otx2_cpt_inst_s *)req->inst, 1, + &lfs->lf[0]); + + rsp = (struct msg_rsp *)otx2_mbox_alloc_msg(&cptpf->afpf_mbox_up, 0, + sizeof(*rsp)); + if (!rsp) + return; + + rsp->hdr.id = msg->id; + rsp->hdr.sig = OTX2_MBOX_RSP_SIG; + rsp->hdr.pcifunc = 0; + rsp->hdr.rc = 0; +} + +static void process_afpf_mbox_up_msg(struct otx2_cptpf_dev *cptpf, + struct mbox_msghdr *msg) +{ + if (msg->id >= MBOX_MSG_MAX) { + dev_err(&cptpf->pdev->dev, + "MBOX msg with unknown ID %d\n", msg->id); + return; + } + + switch (msg->id) { + case MBOX_MSG_CPT_INST_LMTST: + handle_msg_cpt_inst_lmtst(cptpf, msg); + break; + default: + otx2_reply_invalid_msg(&cptpf->afpf_mbox_up, 0, 0, msg->id); + } +} + +void otx2_cptpf_afpf_mbox_up_handler(struct work_struct *work) +{ + struct otx2_cptpf_dev *cptpf; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + struct otx2_mbox *mbox; + int offset, i; + + cptpf = container_of(work, struct otx2_cptpf_dev, afpf_mbox_up_work); + mbox = &cptpf->afpf_mbox_up; + mdev = &mbox->dev[0]; + /* Sync mbox data into memory */ + smp_wmb(); + + rsp_hdr = (struct mbox_hdr *)(mdev->mbase + mbox->rx_start); + offset = mbox->rx_start + ALIGN(sizeof(*rsp_hdr), MBOX_MSG_ALIGN); + + for (i = 0; i < rsp_hdr->num_msgs; i++) { + msg = (struct mbox_msghdr *)(mdev->mbase + offset); + + process_afpf_mbox_up_msg(cptpf, msg); + + offset = mbox->rx_start + msg->next_msgoff; + } + otx2_mbox_msg_send(mbox, 0); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c new file mode 100644 index 0000000000..1958b797a4 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c @@ -0,0 +1,1866 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include <linux/ctype.h> +#include <linux/firmware.h> +#include "otx2_cptpf_ucode.h" +#include "otx2_cpt_common.h" +#include "otx2_cptpf.h" +#include "otx2_cptlf.h" +#include "otx2_cpt_reqmgr.h" +#include "rvu_reg.h" + +#define CSR_DELAY 30 + +#define LOADFVC_RLEN 8 +#define LOADFVC_MAJOR_OP 0x01 +#define LOADFVC_MINOR_OP 0x08 + +#define CTX_FLUSH_TIMER_CNT 0xFFFFFF + +struct fw_info_t { + struct list_head ucodes; +}; + +static struct otx2_cpt_bitmap get_cores_bmap(struct device *dev, + struct otx2_cpt_eng_grp_info *eng_grp) +{ + struct otx2_cpt_bitmap bmap = { {0} }; + bool found = false; + int i; + + if (eng_grp->g->engs_num < 0 || + eng_grp->g->engs_num > OTX2_CPT_MAX_ENGINES) { + dev_err(dev, "unsupported number of engines %d on octeontx2\n", + eng_grp->g->engs_num); + return bmap; + } + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (eng_grp->engs[i].type) { + bitmap_or(bmap.bits, bmap.bits, + eng_grp->engs[i].bmap, + eng_grp->g->engs_num); + bmap.size = eng_grp->g->engs_num; + found = true; + } + } + + if (!found) + dev_err(dev, "No engines reserved for engine group %d\n", + eng_grp->idx); + return bmap; +} + +static int is_eng_type(int val, int eng_type) +{ + return val & (1 << eng_type); +} + +static int is_2nd_ucode_used(struct otx2_cpt_eng_grp_info *eng_grp) +{ + if (eng_grp->ucode[1].type) + return true; + else + return false; +} + +static void set_ucode_filename(struct otx2_cpt_ucode *ucode, + const char *filename) +{ + strscpy(ucode->filename, filename, OTX2_CPT_NAME_LENGTH); +} + +static char *get_eng_type_str(int eng_type) +{ + char *str = "unknown"; + + switch (eng_type) { + case OTX2_CPT_SE_TYPES: + str = "SE"; + break; + + case OTX2_CPT_IE_TYPES: + str = "IE"; + break; + + case OTX2_CPT_AE_TYPES: + str = "AE"; + break; + } + return str; +} + +static char *get_ucode_type_str(int ucode_type) +{ + char *str = "unknown"; + + switch (ucode_type) { + case (1 << OTX2_CPT_SE_TYPES): + str = "SE"; + break; + + case (1 << OTX2_CPT_IE_TYPES): + str = "IE"; + break; + + case (1 << OTX2_CPT_AE_TYPES): + str = "AE"; + break; + + case (1 << OTX2_CPT_SE_TYPES | 1 << OTX2_CPT_IE_TYPES): + str = "SE+IPSEC"; + break; + } + return str; +} + +static int get_ucode_type(struct device *dev, + struct otx2_cpt_ucode_hdr *ucode_hdr, + int *ucode_type) +{ + struct otx2_cptpf_dev *cptpf = dev_get_drvdata(dev); + char ver_str_prefix[OTX2_CPT_UCODE_VER_STR_SZ]; + char tmp_ver_str[OTX2_CPT_UCODE_VER_STR_SZ]; + struct pci_dev *pdev = cptpf->pdev; + int i, val = 0; + u8 nn; + + strscpy(tmp_ver_str, ucode_hdr->ver_str, OTX2_CPT_UCODE_VER_STR_SZ); + for (i = 0; i < strlen(tmp_ver_str); i++) + tmp_ver_str[i] = tolower(tmp_ver_str[i]); + + sprintf(ver_str_prefix, "ocpt-%02d", pdev->revision); + if (!strnstr(tmp_ver_str, ver_str_prefix, OTX2_CPT_UCODE_VER_STR_SZ)) + return -EINVAL; + + nn = ucode_hdr->ver_num.nn; + if (strnstr(tmp_ver_str, "se-", OTX2_CPT_UCODE_VER_STR_SZ) && + (nn == OTX2_CPT_SE_UC_TYPE1 || nn == OTX2_CPT_SE_UC_TYPE2 || + nn == OTX2_CPT_SE_UC_TYPE3)) + val |= 1 << OTX2_CPT_SE_TYPES; + if (strnstr(tmp_ver_str, "ie-", OTX2_CPT_UCODE_VER_STR_SZ) && + (nn == OTX2_CPT_IE_UC_TYPE1 || nn == OTX2_CPT_IE_UC_TYPE2 || + nn == OTX2_CPT_IE_UC_TYPE3)) + val |= 1 << OTX2_CPT_IE_TYPES; + if (strnstr(tmp_ver_str, "ae", OTX2_CPT_UCODE_VER_STR_SZ) && + nn == OTX2_CPT_AE_UC_TYPE) + val |= 1 << OTX2_CPT_AE_TYPES; + + *ucode_type = val; + + if (!val) + return -EINVAL; + + return 0; +} + +static int __write_ucode_base(struct otx2_cptpf_dev *cptpf, int eng, + dma_addr_t dma_addr, int blkaddr) +{ + return otx2_cpt_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_UCODE_BASE(eng), + (u64)dma_addr, blkaddr); +} + +static int cptx_set_ucode_base(struct otx2_cpt_eng_grp_info *eng_grp, + struct otx2_cptpf_dev *cptpf, int blkaddr) +{ + struct otx2_cpt_engs_rsvd *engs; + dma_addr_t dma_addr; + int i, bit, ret; + + /* Set PF number for microcode fetches */ + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_PF_FUNC, + cptpf->pf_id << RVU_PFVF_PF_SHIFT, blkaddr); + if (ret) + return ret; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + + dma_addr = engs->ucode->dma; + + /* + * Set UCODE_BASE only for the cores which are not used, + * other cores should have already valid UCODE_BASE set + */ + for_each_set_bit(bit, engs->bmap, eng_grp->g->engs_num) + if (!eng_grp->g->eng_ref_cnt[bit]) { + ret = __write_ucode_base(cptpf, bit, dma_addr, + blkaddr); + if (ret) + return ret; + } + } + return 0; +} + +static int cpt_set_ucode_base(struct otx2_cpt_eng_grp_info *eng_grp, void *obj) +{ + struct otx2_cptpf_dev *cptpf = obj; + int ret; + + if (cptpf->has_cpt1) { + ret = cptx_set_ucode_base(eng_grp, cptpf, BLKADDR_CPT1); + if (ret) + return ret; + } + return cptx_set_ucode_base(eng_grp, cptpf, BLKADDR_CPT0); +} + +static int cptx_detach_and_disable_cores(struct otx2_cpt_eng_grp_info *eng_grp, + struct otx2_cptpf_dev *cptpf, + struct otx2_cpt_bitmap bmap, + int blkaddr) +{ + int i, timeout = 10; + int busy, ret; + u64 reg = 0; + + /* Detach the cores from group */ + for_each_set_bit(i, bmap.bits, bmap.size) { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL2(i), ®, blkaddr); + if (ret) + return ret; + + if (reg & (1ull << eng_grp->idx)) { + eng_grp->g->eng_ref_cnt[i]--; + reg &= ~(1ull << eng_grp->idx); + + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_CTL2(i), reg, + blkaddr); + if (ret) + return ret; + } + } + + /* Wait for cores to become idle */ + do { + busy = 0; + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + + for_each_set_bit(i, bmap.bits, bmap.size) { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_STS(i), ®, + blkaddr); + if (ret) + return ret; + + if (reg & 0x1) { + busy = 1; + break; + } + } + } while (busy); + + /* Disable the cores only if they are not used anymore */ + for_each_set_bit(i, bmap.bits, bmap.size) { + if (!eng_grp->g->eng_ref_cnt[i]) { + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_CTL(i), 0x0, + blkaddr); + if (ret) + return ret; + } + } + + return 0; +} + +static int cpt_detach_and_disable_cores(struct otx2_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx2_cptpf_dev *cptpf = obj; + struct otx2_cpt_bitmap bmap; + int ret; + + bmap = get_cores_bmap(&cptpf->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + if (cptpf->has_cpt1) { + ret = cptx_detach_and_disable_cores(eng_grp, cptpf, bmap, + BLKADDR_CPT1); + if (ret) + return ret; + } + return cptx_detach_and_disable_cores(eng_grp, cptpf, bmap, + BLKADDR_CPT0); +} + +static int cptx_attach_and_enable_cores(struct otx2_cpt_eng_grp_info *eng_grp, + struct otx2_cptpf_dev *cptpf, + struct otx2_cpt_bitmap bmap, + int blkaddr) +{ + u64 reg = 0; + int i, ret; + + /* Attach the cores to the group */ + for_each_set_bit(i, bmap.bits, bmap.size) { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL2(i), ®, blkaddr); + if (ret) + return ret; + + if (!(reg & (1ull << eng_grp->idx))) { + eng_grp->g->eng_ref_cnt[i]++; + reg |= 1ull << eng_grp->idx; + + ret = otx2_cpt_write_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_CTL2(i), reg, + blkaddr); + if (ret) + return ret; + } + } + + /* Enable the cores */ + for_each_set_bit(i, bmap.bits, bmap.size) { + ret = otx2_cpt_add_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL(i), 0x1, + blkaddr); + if (ret) + return ret; + } + return otx2_cpt_send_af_reg_requests(&cptpf->afpf_mbox, cptpf->pdev); +} + +static int cpt_attach_and_enable_cores(struct otx2_cpt_eng_grp_info *eng_grp, + void *obj) +{ + struct otx2_cptpf_dev *cptpf = obj; + struct otx2_cpt_bitmap bmap; + int ret; + + bmap = get_cores_bmap(&cptpf->pdev->dev, eng_grp); + if (!bmap.size) + return -EINVAL; + + if (cptpf->has_cpt1) { + ret = cptx_attach_and_enable_cores(eng_grp, cptpf, bmap, + BLKADDR_CPT1); + if (ret) + return ret; + } + return cptx_attach_and_enable_cores(eng_grp, cptpf, bmap, BLKADDR_CPT0); +} + +static int load_fw(struct device *dev, struct fw_info_t *fw_info, + char *filename) +{ + struct otx2_cpt_ucode_hdr *ucode_hdr; + struct otx2_cpt_uc_info_t *uc_info; + int ucode_type, ucode_size; + int ret; + + uc_info = kzalloc(sizeof(*uc_info), GFP_KERNEL); + if (!uc_info) + return -ENOMEM; + + ret = request_firmware(&uc_info->fw, filename, dev); + if (ret) + goto free_uc_info; + + ucode_hdr = (struct otx2_cpt_ucode_hdr *)uc_info->fw->data; + ret = get_ucode_type(dev, ucode_hdr, &ucode_type); + if (ret) + goto release_fw; + + ucode_size = ntohl(ucode_hdr->code_length) * 2; + if (!ucode_size) { + dev_err(dev, "Ucode %s invalid size\n", filename); + ret = -EINVAL; + goto release_fw; + } + + set_ucode_filename(&uc_info->ucode, filename); + memcpy(uc_info->ucode.ver_str, ucode_hdr->ver_str, + OTX2_CPT_UCODE_VER_STR_SZ); + uc_info->ucode.ver_num = ucode_hdr->ver_num; + uc_info->ucode.type = ucode_type; + uc_info->ucode.size = ucode_size; + list_add_tail(&uc_info->list, &fw_info->ucodes); + + return 0; + +release_fw: + release_firmware(uc_info->fw); +free_uc_info: + kfree(uc_info); + return ret; +} + +static void cpt_ucode_release_fw(struct fw_info_t *fw_info) +{ + struct otx2_cpt_uc_info_t *curr, *temp; + + if (!fw_info) + return; + + list_for_each_entry_safe(curr, temp, &fw_info->ucodes, list) { + list_del(&curr->list); + release_firmware(curr->fw); + kfree(curr); + } +} + +static struct otx2_cpt_uc_info_t *get_ucode(struct fw_info_t *fw_info, + int ucode_type) +{ + struct otx2_cpt_uc_info_t *curr; + + list_for_each_entry(curr, &fw_info->ucodes, list) { + if (!is_eng_type(curr->ucode.type, ucode_type)) + continue; + + return curr; + } + return NULL; +} + +static void print_uc_info(struct fw_info_t *fw_info) +{ + struct otx2_cpt_uc_info_t *curr; + + list_for_each_entry(curr, &fw_info->ucodes, list) { + pr_debug("Ucode filename %s\n", curr->ucode.filename); + pr_debug("Ucode version string %s\n", curr->ucode.ver_str); + pr_debug("Ucode version %d.%d.%d.%d\n", + curr->ucode.ver_num.nn, curr->ucode.ver_num.xx, + curr->ucode.ver_num.yy, curr->ucode.ver_num.zz); + pr_debug("Ucode type (%d) %s\n", curr->ucode.type, + get_ucode_type_str(curr->ucode.type)); + pr_debug("Ucode size %d\n", curr->ucode.size); + pr_debug("Ucode ptr %p\n", curr->fw->data); + } +} + +static int cpt_ucode_load_fw(struct pci_dev *pdev, struct fw_info_t *fw_info) +{ + char filename[OTX2_CPT_NAME_LENGTH]; + char eng_type[8] = {0}; + int ret, e, i; + + INIT_LIST_HEAD(&fw_info->ucodes); + + for (e = 1; e < OTX2_CPT_MAX_ENG_TYPES; e++) { + strcpy(eng_type, get_eng_type_str(e)); + for (i = 0; i < strlen(eng_type); i++) + eng_type[i] = tolower(eng_type[i]); + + snprintf(filename, sizeof(filename), "mrvl/cpt%02d/%s.out", + pdev->revision, eng_type); + /* Request firmware for each engine type */ + ret = load_fw(&pdev->dev, fw_info, filename); + if (ret) + goto release_fw; + } + print_uc_info(fw_info); + return 0; + +release_fw: + cpt_ucode_release_fw(fw_info); + return ret; +} + +struct otx2_cpt_engs_rsvd *find_engines_by_type( + struct otx2_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + int i; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + if (eng_grp->engs[i].type == eng_type) + return &eng_grp->engs[i]; + } + return NULL; +} + +static int eng_grp_has_eng_type(struct otx2_cpt_eng_grp_info *eng_grp, + int eng_type) +{ + struct otx2_cpt_engs_rsvd *engs; + + engs = find_engines_by_type(eng_grp, eng_type); + + return (engs != NULL ? 1 : 0); +} + +static int update_engines_avail_count(struct device *dev, + struct otx2_cpt_engs_available *avail, + struct otx2_cpt_engs_rsvd *engs, int val) +{ + switch (engs->type) { + case OTX2_CPT_SE_TYPES: + avail->se_cnt += val; + break; + + case OTX2_CPT_IE_TYPES: + avail->ie_cnt += val; + break; + + case OTX2_CPT_AE_TYPES: + avail->ae_cnt += val; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + return 0; +} + +static int update_engines_offset(struct device *dev, + struct otx2_cpt_engs_available *avail, + struct otx2_cpt_engs_rsvd *engs) +{ + switch (engs->type) { + case OTX2_CPT_SE_TYPES: + engs->offset = 0; + break; + + case OTX2_CPT_IE_TYPES: + engs->offset = avail->max_se_cnt; + break; + + case OTX2_CPT_AE_TYPES: + engs->offset = avail->max_se_cnt + avail->max_ie_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + return 0; +} + +static int release_engines(struct device *dev, + struct otx2_cpt_eng_grp_info *grp) +{ + int i, ret = 0; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) + continue; + + if (grp->engs[i].count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, + &grp->engs[i], + grp->engs[i].count); + if (ret) + return ret; + } + + grp->engs[i].type = 0; + grp->engs[i].count = 0; + grp->engs[i].offset = 0; + grp->engs[i].ucode = NULL; + bitmap_zero(grp->engs[i].bmap, grp->g->engs_num); + } + return 0; +} + +static int do_reserve_engines(struct device *dev, + struct otx2_cpt_eng_grp_info *grp, + struct otx2_cpt_engines *req_engs) +{ + struct otx2_cpt_engs_rsvd *engs = NULL; + int i, ret; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!grp->engs[i].type) { + engs = &grp->engs[i]; + break; + } + } + + if (!engs) + return -ENOMEM; + + engs->type = req_engs->type; + engs->count = req_engs->count; + + ret = update_engines_offset(dev, &grp->g->avail, engs); + if (ret) + return ret; + + if (engs->count > 0) { + ret = update_engines_avail_count(dev, &grp->g->avail, engs, + -engs->count); + if (ret) + return ret; + } + + return 0; +} + +static int check_engines_availability(struct device *dev, + struct otx2_cpt_eng_grp_info *grp, + struct otx2_cpt_engines *req_eng) +{ + int avail_cnt = 0; + + switch (req_eng->type) { + case OTX2_CPT_SE_TYPES: + avail_cnt = grp->g->avail.se_cnt; + break; + + case OTX2_CPT_IE_TYPES: + avail_cnt = grp->g->avail.ie_cnt; + break; + + case OTX2_CPT_AE_TYPES: + avail_cnt = grp->g->avail.ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", req_eng->type); + return -EINVAL; + } + + if (avail_cnt < req_eng->count) { + dev_err(dev, + "Error available %s engines %d < than requested %d\n", + get_eng_type_str(req_eng->type), + avail_cnt, req_eng->count); + return -EBUSY; + } + return 0; +} + +static int reserve_engines(struct device *dev, + struct otx2_cpt_eng_grp_info *grp, + struct otx2_cpt_engines *req_engs, int ucodes_cnt) +{ + int i, ret = 0; + + /* Validate if a number of requested engines are available */ + for (i = 0; i < ucodes_cnt; i++) { + ret = check_engines_availability(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + + /* Reserve requested engines for this engine group */ + for (i = 0; i < ucodes_cnt; i++) { + ret = do_reserve_engines(dev, grp, &req_engs[i]); + if (ret) + return ret; + } + return 0; +} + +static void ucode_unload(struct device *dev, struct otx2_cpt_ucode *ucode) +{ + if (ucode->va) { + dma_free_coherent(dev, OTX2_CPT_UCODE_SZ, ucode->va, + ucode->dma); + ucode->va = NULL; + ucode->dma = 0; + ucode->size = 0; + } + + memset(&ucode->ver_str, 0, OTX2_CPT_UCODE_VER_STR_SZ); + memset(&ucode->ver_num, 0, sizeof(struct otx2_cpt_ucode_ver_num)); + set_ucode_filename(ucode, ""); + ucode->type = 0; +} + +static int copy_ucode_to_dma_mem(struct device *dev, + struct otx2_cpt_ucode *ucode, + const u8 *ucode_data) +{ + u32 i; + + /* Allocate DMAable space */ + ucode->va = dma_alloc_coherent(dev, OTX2_CPT_UCODE_SZ, &ucode->dma, + GFP_KERNEL); + if (!ucode->va) + return -ENOMEM; + + memcpy(ucode->va, ucode_data + sizeof(struct otx2_cpt_ucode_hdr), + ucode->size); + + /* Byte swap 64-bit */ + for (i = 0; i < (ucode->size / 8); i++) + cpu_to_be64s(&((u64 *)ucode->va)[i]); + /* Ucode needs 16-bit swap */ + for (i = 0; i < (ucode->size / 2); i++) + cpu_to_be16s(&((u16 *)ucode->va)[i]); + return 0; +} + +static int enable_eng_grp(struct otx2_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int ret; + + /* Point microcode to each core of the group */ + ret = cpt_set_ucode_base(eng_grp, obj); + if (ret) + return ret; + + /* Attach the cores to the group and enable them */ + ret = cpt_attach_and_enable_cores(eng_grp, obj); + + return ret; +} + +static int disable_eng_grp(struct device *dev, + struct otx2_cpt_eng_grp_info *eng_grp, + void *obj) +{ + int i, ret; + + /* Disable all engines used by this group */ + ret = cpt_detach_and_disable_cores(eng_grp, obj); + if (ret) + return ret; + + /* Unload ucode used by this engine group */ + ucode_unload(dev, &eng_grp->ucode[0]); + ucode_unload(dev, &eng_grp->ucode[1]); + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + if (!eng_grp->engs[i].type) + continue; + + eng_grp->engs[i].ucode = &eng_grp->ucode[0]; + } + + /* Clear UCODE_BASE register for each engine used by this group */ + ret = cpt_set_ucode_base(eng_grp, obj); + + return ret; +} + +static void setup_eng_grp_mirroring(struct otx2_cpt_eng_grp_info *dst_grp, + struct otx2_cpt_eng_grp_info *src_grp) +{ + /* Setup fields for engine group which is mirrored */ + src_grp->mirror.is_ena = false; + src_grp->mirror.idx = 0; + src_grp->mirror.ref_count++; + + /* Setup fields for mirroring engine group */ + dst_grp->mirror.is_ena = true; + dst_grp->mirror.idx = src_grp->idx; + dst_grp->mirror.ref_count = 0; +} + +static void remove_eng_grp_mirroring(struct otx2_cpt_eng_grp_info *dst_grp) +{ + struct otx2_cpt_eng_grp_info *src_grp; + + if (!dst_grp->mirror.is_ena) + return; + + src_grp = &dst_grp->g->grp[dst_grp->mirror.idx]; + + src_grp->mirror.ref_count--; + dst_grp->mirror.is_ena = false; + dst_grp->mirror.idx = 0; + dst_grp->mirror.ref_count = 0; +} + +static void update_requested_engs(struct otx2_cpt_eng_grp_info *mirror_eng_grp, + struct otx2_cpt_engines *engs, int engs_cnt) +{ + struct otx2_cpt_engs_rsvd *mirrored_engs; + int i; + + for (i = 0; i < engs_cnt; i++) { + mirrored_engs = find_engines_by_type(mirror_eng_grp, + engs[i].type); + if (!mirrored_engs) + continue; + + /* + * If mirrored group has this type of engines attached then + * there are 3 scenarios possible: + * 1) mirrored_engs.count == engs[i].count then all engines + * from mirrored engine group will be shared with this engine + * group + * 2) mirrored_engs.count > engs[i].count then only a subset of + * engines from mirrored engine group will be shared with this + * engine group + * 3) mirrored_engs.count < engs[i].count then all engines + * from mirrored engine group will be shared with this group + * and additional engines will be reserved for exclusively use + * by this engine group + */ + engs[i].count -= mirrored_engs->count; + } +} + +static struct otx2_cpt_eng_grp_info *find_mirrored_eng_grp( + struct otx2_cpt_eng_grp_info *grp) +{ + struct otx2_cpt_eng_grps *eng_grps = grp->g; + int i; + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + continue; + if (eng_grps->grp[i].ucode[0].type && + eng_grps->grp[i].ucode[1].type) + continue; + if (grp->idx == i) + continue; + if (!strncasecmp(eng_grps->grp[i].ucode[0].ver_str, + grp->ucode[0].ver_str, + OTX2_CPT_UCODE_VER_STR_SZ)) + return &eng_grps->grp[i]; + } + + return NULL; +} + +static struct otx2_cpt_eng_grp_info *find_unused_eng_grp( + struct otx2_cpt_eng_grps *eng_grps) +{ + int i; + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + if (!eng_grps->grp[i].is_enabled) + return &eng_grps->grp[i]; + } + return NULL; +} + +static int eng_grp_update_masks(struct device *dev, + struct otx2_cpt_eng_grp_info *eng_grp) +{ + struct otx2_cpt_engs_rsvd *engs, *mirrored_engs; + struct otx2_cpt_bitmap tmp_bmap = { {0} }; + int i, j, cnt, max_cnt; + int bit; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (engs->count <= 0) + continue; + + switch (engs->type) { + case OTX2_CPT_SE_TYPES: + max_cnt = eng_grp->g->avail.max_se_cnt; + break; + + case OTX2_CPT_IE_TYPES: + max_cnt = eng_grp->g->avail.max_ie_cnt; + break; + + case OTX2_CPT_AE_TYPES: + max_cnt = eng_grp->g->avail.max_ae_cnt; + break; + + default: + dev_err(dev, "Invalid engine type %d\n", engs->type); + return -EINVAL; + } + + cnt = engs->count; + WARN_ON(engs->offset + max_cnt > OTX2_CPT_MAX_ENGINES); + bitmap_zero(tmp_bmap.bits, eng_grp->g->engs_num); + for (j = engs->offset; j < engs->offset + max_cnt; j++) { + if (!eng_grp->g->eng_ref_cnt[j]) { + bitmap_set(tmp_bmap.bits, j, 1); + cnt--; + if (!cnt) + break; + } + } + + if (cnt) + return -ENOSPC; + + bitmap_copy(engs->bmap, tmp_bmap.bits, eng_grp->g->engs_num); + } + + if (!eng_grp->mirror.is_ena) + return 0; + + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + WARN_ON(!mirrored_engs && engs->count <= 0); + if (!mirrored_engs) + continue; + + bitmap_copy(tmp_bmap.bits, mirrored_engs->bmap, + eng_grp->g->engs_num); + if (engs->count < 0) { + bit = find_first_bit(mirrored_engs->bmap, + eng_grp->g->engs_num); + bitmap_clear(tmp_bmap.bits, bit, -engs->count); + } + bitmap_or(engs->bmap, engs->bmap, tmp_bmap.bits, + eng_grp->g->engs_num); + } + return 0; +} + +static int delete_engine_group(struct device *dev, + struct otx2_cpt_eng_grp_info *eng_grp) +{ + int ret; + + if (!eng_grp->is_enabled) + return 0; + + if (eng_grp->mirror.ref_count) + return -EINVAL; + + /* Removing engine group mirroring if enabled */ + remove_eng_grp_mirroring(eng_grp); + + /* Disable engine group */ + ret = disable_eng_grp(dev, eng_grp, eng_grp->g->obj); + if (ret) + return ret; + + /* Release all engines held by this engine group */ + ret = release_engines(dev, eng_grp); + if (ret) + return ret; + + eng_grp->is_enabled = false; + + return 0; +} + +static void update_ucode_ptrs(struct otx2_cpt_eng_grp_info *eng_grp) +{ + struct otx2_cpt_ucode *ucode; + + if (eng_grp->mirror.is_ena) + ucode = &eng_grp->g->grp[eng_grp->mirror.idx].ucode[0]; + else + ucode = &eng_grp->ucode[0]; + WARN_ON(!eng_grp->engs[0].type); + eng_grp->engs[0].ucode = ucode; + + if (eng_grp->engs[1].type) { + if (is_2nd_ucode_used(eng_grp)) + eng_grp->engs[1].ucode = &eng_grp->ucode[1]; + else + eng_grp->engs[1].ucode = ucode; + } +} + +static int create_engine_group(struct device *dev, + struct otx2_cpt_eng_grps *eng_grps, + struct otx2_cpt_engines *engs, int ucodes_cnt, + void *ucode_data[], int is_print) +{ + struct otx2_cpt_eng_grp_info *mirrored_eng_grp; + struct otx2_cpt_eng_grp_info *eng_grp; + struct otx2_cpt_uc_info_t *uc_info; + int i, ret = 0; + + /* Find engine group which is not used */ + eng_grp = find_unused_eng_grp(eng_grps); + if (!eng_grp) { + dev_err(dev, "Error all engine groups are being used\n"); + return -ENOSPC; + } + /* Load ucode */ + for (i = 0; i < ucodes_cnt; i++) { + uc_info = (struct otx2_cpt_uc_info_t *) ucode_data[i]; + eng_grp->ucode[i] = uc_info->ucode; + ret = copy_ucode_to_dma_mem(dev, &eng_grp->ucode[i], + uc_info->fw->data); + if (ret) + goto unload_ucode; + } + + /* Check if this group mirrors another existing engine group */ + mirrored_eng_grp = find_mirrored_eng_grp(eng_grp); + if (mirrored_eng_grp) { + /* Setup mirroring */ + setup_eng_grp_mirroring(eng_grp, mirrored_eng_grp); + + /* + * Update count of requested engines because some + * of them might be shared with mirrored group + */ + update_requested_engs(mirrored_eng_grp, engs, ucodes_cnt); + } + ret = reserve_engines(dev, eng_grp, engs, ucodes_cnt); + if (ret) + goto unload_ucode; + + /* Update ucode pointers used by engines */ + update_ucode_ptrs(eng_grp); + + /* Update engine masks used by this group */ + ret = eng_grp_update_masks(dev, eng_grp); + if (ret) + goto release_engs; + + /* Enable engine group */ + ret = enable_eng_grp(eng_grp, eng_grps->obj); + if (ret) + goto release_engs; + + /* + * If this engine group mirrors another engine group + * then we need to unload ucode as we will use ucode + * from mirrored engine group + */ + if (eng_grp->mirror.is_ena) + ucode_unload(dev, &eng_grp->ucode[0]); + + eng_grp->is_enabled = true; + + if (!is_print) + return 0; + + if (mirrored_eng_grp) + dev_info(dev, + "Engine_group%d: reuse microcode %s from group %d\n", + eng_grp->idx, mirrored_eng_grp->ucode[0].ver_str, + mirrored_eng_grp->idx); + else + dev_info(dev, "Engine_group%d: microcode loaded %s\n", + eng_grp->idx, eng_grp->ucode[0].ver_str); + if (is_2nd_ucode_used(eng_grp)) + dev_info(dev, "Engine_group%d: microcode loaded %s\n", + eng_grp->idx, eng_grp->ucode[1].ver_str); + + return 0; + +release_engs: + release_engines(dev, eng_grp); +unload_ucode: + ucode_unload(dev, &eng_grp->ucode[0]); + ucode_unload(dev, &eng_grp->ucode[1]); + return ret; +} + +static void delete_engine_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + int i; + + /* First delete all mirroring engine groups */ + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) + if (eng_grps->grp[i].mirror.is_ena) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); + + /* Delete remaining engine groups */ + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) + delete_engine_group(&pdev->dev, &eng_grps->grp[i]); +} + +#define PCI_DEVID_CN10K_RNM 0xA098 +#define RNM_ENTROPY_STATUS 0x8 + +static void rnm_to_cpt_errata_fixup(struct device *dev) +{ + struct pci_dev *pdev; + void __iomem *base; + int timeout = 5000; + + pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, PCI_DEVID_CN10K_RNM, NULL); + if (!pdev) + return; + + base = pci_ioremap_bar(pdev, 0); + if (!base) + goto put_pdev; + + while ((readq(base + RNM_ENTROPY_STATUS) & 0x7F) != 0x40) { + cpu_relax(); + udelay(1); + timeout--; + if (!timeout) { + dev_warn(dev, "RNM is not producing entropy\n"); + break; + } + } + + iounmap(base); + +put_pdev: + pci_dev_put(pdev); +} + +int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type) +{ + + int eng_grp_num = OTX2_CPT_INVALID_CRYPTO_ENG_GRP; + struct otx2_cpt_eng_grp_info *grp; + int i; + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + if (!grp->is_enabled) + continue; + + if (eng_type == OTX2_CPT_SE_TYPES) { + if (eng_grp_has_eng_type(grp, eng_type) && + !eng_grp_has_eng_type(grp, OTX2_CPT_IE_TYPES)) { + eng_grp_num = i; + break; + } + } else { + if (eng_grp_has_eng_type(grp, eng_type)) { + eng_grp_num = i; + break; + } + } + } + return eng_grp_num; +} + +int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, + struct otx2_cpt_eng_grps *eng_grps) +{ + struct otx2_cpt_uc_info_t *uc_info[OTX2_CPT_MAX_ETYPES_PER_GRP] = { }; + struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + struct pci_dev *pdev = cptpf->pdev; + struct fw_info_t fw_info; + u64 reg_val; + int ret = 0; + + mutex_lock(&eng_grps->lock); + /* + * We don't create engine groups if it was already + * made (when user enabled VFs for the first time) + */ + if (eng_grps->is_grps_created) + goto unlock; + + ret = cpt_ucode_load_fw(pdev, &fw_info); + if (ret) + goto unlock; + + /* + * Create engine group with SE engines for kernel + * crypto functionality (symmetric crypto) + */ + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_SE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for SE\n"); + ret = -EINVAL; + goto release_fw; + } + engs[0].type = OTX2_CPT_SE_TYPES; + engs[0].count = eng_grps->avail.max_se_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 1); + if (ret) + goto release_fw; + + /* + * Create engine group with SE+IE engines for IPSec. + * All SE engines will be shared with engine group 0. + */ + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_SE_TYPES); + uc_info[1] = get_ucode(&fw_info, OTX2_CPT_IE_TYPES); + + if (uc_info[1] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for IE"); + ret = -EINVAL; + goto delete_eng_grp; + } + engs[0].type = OTX2_CPT_SE_TYPES; + engs[0].count = eng_grps->avail.max_se_cnt; + engs[1].type = OTX2_CPT_IE_TYPES; + engs[1].count = eng_grps->avail.max_ie_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 2, + (void **) uc_info, 1); + if (ret) + goto delete_eng_grp; + + /* + * Create engine group with AE engines for asymmetric + * crypto functionality. + */ + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_AE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for AE"); + ret = -EINVAL; + goto delete_eng_grp; + } + engs[0].type = OTX2_CPT_AE_TYPES; + engs[0].count = eng_grps->avail.max_ae_cnt; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 1); + if (ret) + goto delete_eng_grp; + + eng_grps->is_grps_created = true; + + cpt_ucode_release_fw(&fw_info); + + if (is_dev_otx2(pdev)) + goto unlock; + + /* + * Ensure RNM_ENTROPY_STATUS[NORMAL_CNT] = 0x40 before writing + * CPT_AF_CTL[RNM_REQ_EN] = 1 as a workaround for HW errata. + */ + rnm_to_cpt_errata_fixup(&pdev->dev); + + /* + * Configure engine group mask to allow context prefetching + * for the groups and enable random number request, to enable + * CPT to request random numbers from RNM. + */ + otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL, + OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16), + BLKADDR_CPT0); + /* + * Set interval to periodically flush dirty data for the next + * CTX cache entry. Set the interval count to maximum supported + * value. + */ + otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTX_FLUSH_TIMER, + CTX_FLUSH_TIMER_CNT, BLKADDR_CPT0); + + /* + * Set CPT_AF_DIAG[FLT_DIS], as a workaround for HW errata, when + * CPT_AF_DIAG[FLT_DIS] = 0 and a CPT engine access to LLC/DRAM + * encounters a fault/poison, a rare case may result in + * unpredictable data being delivered to a CPT engine. + */ + otx2_cpt_read_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG, ®_val, + BLKADDR_CPT0); + otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_DIAG, + reg_val | BIT_ULL(24), BLKADDR_CPT0); + + mutex_unlock(&eng_grps->lock); + return 0; + +delete_eng_grp: + delete_engine_grps(pdev, eng_grps); +release_fw: + cpt_ucode_release_fw(&fw_info); +unlock: + mutex_unlock(&eng_grps->lock); + return ret; +} + +static int cptx_disable_all_cores(struct otx2_cptpf_dev *cptpf, int total_cores, + int blkaddr) +{ + int timeout = 10, ret; + int i, busy; + u64 reg; + + /* Disengage the cores from groups */ + for (i = 0; i < total_cores; i++) { + ret = otx2_cpt_add_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL2(i), 0x0, + blkaddr); + if (ret) + return ret; + + cptpf->eng_grps.eng_ref_cnt[i] = 0; + } + ret = otx2_cpt_send_af_reg_requests(&cptpf->afpf_mbox, cptpf->pdev); + if (ret) + return ret; + + /* Wait for cores to become idle */ + do { + busy = 0; + usleep_range(10000, 20000); + if (timeout-- < 0) + return -EBUSY; + + for (i = 0; i < total_cores; i++) { + ret = otx2_cpt_read_af_reg(&cptpf->afpf_mbox, + cptpf->pdev, + CPT_AF_EXEX_STS(i), ®, + blkaddr); + if (ret) + return ret; + + if (reg & 0x1) { + busy = 1; + break; + } + } + } while (busy); + + /* Disable the cores */ + for (i = 0; i < total_cores; i++) { + ret = otx2_cpt_add_write_af_reg(&cptpf->afpf_mbox, cptpf->pdev, + CPT_AF_EXEX_CTL(i), 0x0, + blkaddr); + if (ret) + return ret; + } + return otx2_cpt_send_af_reg_requests(&cptpf->afpf_mbox, cptpf->pdev); +} + +int otx2_cpt_disable_all_cores(struct otx2_cptpf_dev *cptpf) +{ + int total_cores, ret; + + total_cores = cptpf->eng_grps.avail.max_se_cnt + + cptpf->eng_grps.avail.max_ie_cnt + + cptpf->eng_grps.avail.max_ae_cnt; + + if (cptpf->has_cpt1) { + ret = cptx_disable_all_cores(cptpf, total_cores, BLKADDR_CPT1); + if (ret) + return ret; + } + return cptx_disable_all_cores(cptpf, total_cores, BLKADDR_CPT0); +} + +void otx2_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + struct otx2_cpt_eng_grp_info *grp; + int i, j; + + mutex_lock(&eng_grps->lock); + delete_engine_grps(pdev, eng_grps); + /* Release memory */ + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + for (j = 0; j < OTX2_CPT_MAX_ETYPES_PER_GRP; j++) { + kfree(grp->engs[j].bmap); + grp->engs[j].bmap = NULL; + } + } + mutex_unlock(&eng_grps->lock); +} + +int otx2_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + struct otx2_cpt_eng_grp_info *grp; + int i, j, ret; + + mutex_init(&eng_grps->lock); + eng_grps->obj = pci_get_drvdata(pdev); + eng_grps->avail.se_cnt = eng_grps->avail.max_se_cnt; + eng_grps->avail.ie_cnt = eng_grps->avail.max_ie_cnt; + eng_grps->avail.ae_cnt = eng_grps->avail.max_ae_cnt; + + eng_grps->engs_num = eng_grps->avail.max_se_cnt + + eng_grps->avail.max_ie_cnt + + eng_grps->avail.max_ae_cnt; + if (eng_grps->engs_num > OTX2_CPT_MAX_ENGINES) { + dev_err(&pdev->dev, + "Number of engines %d > than max supported %d\n", + eng_grps->engs_num, OTX2_CPT_MAX_ENGINES); + ret = -EINVAL; + goto cleanup_eng_grps; + } + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + grp->g = eng_grps; + grp->idx = i; + + for (j = 0; j < OTX2_CPT_MAX_ETYPES_PER_GRP; j++) { + grp->engs[j].bmap = + kcalloc(BITS_TO_LONGS(eng_grps->engs_num), + sizeof(long), GFP_KERNEL); + if (!grp->engs[j].bmap) { + ret = -ENOMEM; + goto cleanup_eng_grps; + } + } + } + return 0; + +cleanup_eng_grps: + otx2_cpt_cleanup_eng_grps(pdev, eng_grps); + return ret; +} + +static int create_eng_caps_discovery_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps) +{ + struct otx2_cpt_uc_info_t *uc_info[OTX2_CPT_MAX_ETYPES_PER_GRP] = { }; + struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} }; + struct fw_info_t fw_info; + int ret; + + mutex_lock(&eng_grps->lock); + ret = cpt_ucode_load_fw(pdev, &fw_info); + if (ret) { + mutex_unlock(&eng_grps->lock); + return ret; + } + + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_AE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for AE\n"); + ret = -EINVAL; + goto release_fw; + } + engs[0].type = OTX2_CPT_AE_TYPES; + engs[0].count = 2; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 0); + if (ret) + goto release_fw; + + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_SE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for SE\n"); + ret = -EINVAL; + goto delete_eng_grp; + } + engs[0].type = OTX2_CPT_SE_TYPES; + engs[0].count = 2; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 0); + if (ret) + goto delete_eng_grp; + + uc_info[0] = get_ucode(&fw_info, OTX2_CPT_IE_TYPES); + if (uc_info[0] == NULL) { + dev_err(&pdev->dev, "Unable to find firmware for IE\n"); + ret = -EINVAL; + goto delete_eng_grp; + } + engs[0].type = OTX2_CPT_IE_TYPES; + engs[0].count = 2; + + ret = create_engine_group(&pdev->dev, eng_grps, engs, 1, + (void **) uc_info, 0); + if (ret) + goto delete_eng_grp; + + cpt_ucode_release_fw(&fw_info); + mutex_unlock(&eng_grps->lock); + return 0; + +delete_eng_grp: + delete_engine_grps(pdev, eng_grps); +release_fw: + cpt_ucode_release_fw(&fw_info); + mutex_unlock(&eng_grps->lock); + return ret; +} + +/* + * Get CPT HW capabilities using LOAD_FVC operation. + */ +int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf) +{ + struct otx2_cptlfs_info *lfs = &cptpf->lfs; + struct otx2_cpt_iq_command iq_cmd; + union otx2_cpt_opcode opcode; + union otx2_cpt_res_s *result; + union otx2_cpt_inst_s inst; + dma_addr_t rptr_baddr; + struct pci_dev *pdev; + u32 len, compl_rlen; + int ret, etype; + void *rptr; + + /* + * We don't get capabilities if it was already done + * (when user enabled VFs for the first time) + */ + if (cptpf->is_eng_caps_discovered) + return 0; + + pdev = cptpf->pdev; + /* + * Create engine groups for each type to submit LOAD_FVC op and + * get engine's capabilities. + */ + ret = create_eng_caps_discovery_grps(pdev, &cptpf->eng_grps); + if (ret) + goto delete_grps; + + otx2_cptlf_set_dev_info(lfs, cptpf->pdev, cptpf->reg_base, + &cptpf->afpf_mbox, BLKADDR_CPT0); + ret = otx2_cptlf_init(lfs, OTX2_CPT_ALL_ENG_GRPS_MASK, + OTX2_CPT_QUEUE_HI_PRIO, 1); + if (ret) + goto delete_grps; + + compl_rlen = ALIGN(sizeof(union otx2_cpt_res_s), OTX2_CPT_DMA_MINALIGN); + len = compl_rlen + LOADFVC_RLEN; + + result = kzalloc(len, GFP_KERNEL); + if (!result) { + ret = -ENOMEM; + goto lf_cleanup; + } + rptr_baddr = dma_map_single(&pdev->dev, (void *)result, len, + DMA_BIDIRECTIONAL); + if (dma_mapping_error(&pdev->dev, rptr_baddr)) { + dev_err(&pdev->dev, "DMA mapping failed\n"); + ret = -EFAULT; + goto free_result; + } + rptr = (u8 *)result + compl_rlen; + + /* Fill in the command */ + opcode.s.major = LOADFVC_MAJOR_OP; + opcode.s.minor = LOADFVC_MINOR_OP; + + iq_cmd.cmd.u = 0; + iq_cmd.cmd.s.opcode = cpu_to_be16(opcode.flags); + + /* 64-bit swap for microcode data reads, not needed for addresses */ + cpu_to_be64s(&iq_cmd.cmd.u); + iq_cmd.dptr = 0; + iq_cmd.rptr = rptr_baddr + compl_rlen; + iq_cmd.cptr.u = 0; + + for (etype = 1; etype < OTX2_CPT_MAX_ENG_TYPES; etype++) { + result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT; + iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps, + etype); + otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr); + lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]); + + while (lfs->ops->cpt_get_compcode(result) == + OTX2_CPT_COMPLETION_CODE_INIT) + cpu_relax(); + + cptpf->eng_caps[etype].u = be64_to_cpup(rptr); + } + dma_unmap_single(&pdev->dev, rptr_baddr, len, DMA_BIDIRECTIONAL); + cptpf->is_eng_caps_discovered = true; + +free_result: + kfree(result); +lf_cleanup: + otx2_cptlf_shutdown(lfs); +delete_grps: + delete_engine_grps(pdev, &cptpf->eng_grps); + + return ret; +} + +int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf, + struct devlink_param_gset_ctx *ctx) +{ + struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { { 0 } }; + struct otx2_cpt_uc_info_t *uc_info[OTX2_CPT_MAX_ETYPES_PER_GRP] = {}; + struct otx2_cpt_eng_grps *eng_grps = &cptpf->eng_grps; + char *ucode_filename[OTX2_CPT_MAX_ETYPES_PER_GRP]; + char tmp_buf[OTX2_CPT_NAME_LENGTH] = { 0 }; + struct device *dev = &cptpf->pdev->dev; + char *start, *val, *err_msg, *tmp; + int grp_idx = 0, ret = -EINVAL; + bool has_se, has_ie, has_ae; + struct fw_info_t fw_info; + int ucode_idx = 0; + + if (!eng_grps->is_grps_created) { + dev_err(dev, "Not allowed before creating the default groups\n"); + return -EINVAL; + } + err_msg = "Invalid engine group format"; + strscpy(tmp_buf, ctx->val.vstr, strlen(ctx->val.vstr) + 1); + start = tmp_buf; + + has_se = has_ie = has_ae = false; + + for (;;) { + val = strsep(&start, ";"); + if (!val) + break; + val = strim(val); + if (!*val) + continue; + + if (!strncasecmp(val, "se", 2) && strchr(val, ':')) { + if (has_se || ucode_idx) + goto err_print; + tmp = strsep(&val, ":"); + if (!tmp) + goto err_print; + tmp = strim(tmp); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX2_CPT_SE_TYPES; + has_se = true; + } else if (!strncasecmp(val, "ae", 2) && strchr(val, ':')) { + if (has_ae || ucode_idx) + goto err_print; + tmp = strsep(&val, ":"); + if (!tmp) + goto err_print; + tmp = strim(tmp); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX2_CPT_AE_TYPES; + has_ae = true; + } else if (!strncasecmp(val, "ie", 2) && strchr(val, ':')) { + if (has_ie || ucode_idx) + goto err_print; + tmp = strsep(&val, ":"); + if (!tmp) + goto err_print; + tmp = strim(tmp); + if (!val) + goto err_print; + if (strlen(tmp) != 2) + goto err_print; + if (kstrtoint(strim(val), 10, &engs[grp_idx].count)) + goto err_print; + engs[grp_idx++].type = OTX2_CPT_IE_TYPES; + has_ie = true; + } else { + if (ucode_idx > 1) + goto err_print; + if (!strlen(val)) + goto err_print; + if (strnstr(val, " ", strlen(val))) + goto err_print; + ucode_filename[ucode_idx++] = val; + } + } + + /* Validate input parameters */ + if (!(grp_idx && ucode_idx)) + goto err_print; + + if (ucode_idx > 1 && grp_idx < 2) + goto err_print; + + if (grp_idx > OTX2_CPT_MAX_ETYPES_PER_GRP) { + err_msg = "Error max 2 engine types can be attached"; + goto err_print; + } + + if (grp_idx > 1) { + if ((engs[0].type + engs[1].type) != + (OTX2_CPT_SE_TYPES + OTX2_CPT_IE_TYPES)) { + err_msg = "Only combination of SE+IE engines is allowed"; + goto err_print; + } + /* Keep SE engines at zero index */ + if (engs[1].type == OTX2_CPT_SE_TYPES) + swap(engs[0], engs[1]); + } + mutex_lock(&eng_grps->lock); + + if (cptpf->enabled_vfs) { + dev_err(dev, "Disable VFs before modifying engine groups\n"); + ret = -EACCES; + goto err_unlock; + } + INIT_LIST_HEAD(&fw_info.ucodes); + ret = load_fw(dev, &fw_info, ucode_filename[0]); + if (ret) { + dev_err(dev, "Unable to load firmware %s\n", ucode_filename[0]); + goto err_unlock; + } + if (ucode_idx > 1) { + ret = load_fw(dev, &fw_info, ucode_filename[1]); + if (ret) { + dev_err(dev, "Unable to load firmware %s\n", + ucode_filename[1]); + goto release_fw; + } + } + uc_info[0] = get_ucode(&fw_info, engs[0].type); + if (uc_info[0] == NULL) { + dev_err(dev, "Unable to find firmware for %s\n", + get_eng_type_str(engs[0].type)); + ret = -EINVAL; + goto release_fw; + } + if (ucode_idx > 1) { + uc_info[1] = get_ucode(&fw_info, engs[1].type); + if (uc_info[1] == NULL) { + dev_err(dev, "Unable to find firmware for %s\n", + get_eng_type_str(engs[1].type)); + ret = -EINVAL; + goto release_fw; + } + } + ret = create_engine_group(dev, eng_grps, engs, grp_idx, + (void **)uc_info, 1); + +release_fw: + cpt_ucode_release_fw(&fw_info); +err_unlock: + mutex_unlock(&eng_grps->lock); + return ret; +err_print: + dev_err(dev, "%s\n", err_msg); + return ret; +} + +int otx2_cpt_dl_custom_egrp_delete(struct otx2_cptpf_dev *cptpf, + struct devlink_param_gset_ctx *ctx) +{ + struct otx2_cpt_eng_grps *eng_grps = &cptpf->eng_grps; + struct device *dev = &cptpf->pdev->dev; + char *tmp, *err_msg; + int egrp; + int ret; + + err_msg = "Invalid input string format(ex: egrp:0)"; + if (strncasecmp(ctx->val.vstr, "egrp", 4)) + goto err_print; + tmp = ctx->val.vstr; + strsep(&tmp, ":"); + if (!tmp) + goto err_print; + if (kstrtoint(tmp, 10, &egrp)) + goto err_print; + + if (egrp < 0 || egrp >= OTX2_CPT_MAX_ENGINE_GROUPS) { + dev_err(dev, "Invalid engine group %d", egrp); + return -EINVAL; + } + if (!eng_grps->grp[egrp].is_enabled) { + dev_err(dev, "Error engine_group%d is not configured", egrp); + return -EINVAL; + } + mutex_lock(&eng_grps->lock); + ret = delete_engine_group(dev, &eng_grps->grp[egrp]); + mutex_unlock(&eng_grps->lock); + + return ret; + +err_print: + dev_err(dev, "%s\n", err_msg); + return -EINVAL; +} + +static void get_engs_info(struct otx2_cpt_eng_grp_info *eng_grp, char *buf, + int size, int idx) +{ + struct otx2_cpt_engs_rsvd *mirrored_engs = NULL; + struct otx2_cpt_engs_rsvd *engs; + int len, i; + + buf[0] = '\0'; + for (i = 0; i < OTX2_CPT_MAX_ETYPES_PER_GRP; i++) { + engs = &eng_grp->engs[i]; + if (!engs->type) + continue; + if (idx != -1 && idx != i) + continue; + + if (eng_grp->mirror.is_ena) + mirrored_engs = find_engines_by_type( + &eng_grp->g->grp[eng_grp->mirror.idx], + engs->type); + if (i > 0 && idx == -1) { + len = strlen(buf); + scnprintf(buf + len, size - len, ", "); + } + + len = strlen(buf); + scnprintf(buf + len, size - len, "%d %s ", + mirrored_engs ? engs->count + mirrored_engs->count : + engs->count, + get_eng_type_str(engs->type)); + if (mirrored_engs) { + len = strlen(buf); + scnprintf(buf + len, size - len, + "(%d shared with engine_group%d) ", + engs->count <= 0 ? + engs->count + mirrored_engs->count : + mirrored_engs->count, + eng_grp->mirror.idx); + } + } +} + +void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf) +{ + struct otx2_cpt_eng_grps *eng_grps = &cptpf->eng_grps; + struct otx2_cpt_eng_grp_info *mirrored_grp; + char engs_info[2 * OTX2_CPT_NAME_LENGTH]; + struct otx2_cpt_eng_grp_info *grp; + struct otx2_cpt_engs_rsvd *engs; + int i, j; + + pr_debug("Engine groups global info"); + pr_debug("max SE %d, max IE %d, max AE %d", eng_grps->avail.max_se_cnt, + eng_grps->avail.max_ie_cnt, eng_grps->avail.max_ae_cnt); + pr_debug("free SE %d", eng_grps->avail.se_cnt); + pr_debug("free IE %d", eng_grps->avail.ie_cnt); + pr_debug("free AE %d", eng_grps->avail.ae_cnt); + + for (i = 0; i < OTX2_CPT_MAX_ENGINE_GROUPS; i++) { + grp = &eng_grps->grp[i]; + pr_debug("engine_group%d, state %s", i, + grp->is_enabled ? "enabled" : "disabled"); + if (grp->is_enabled) { + mirrored_grp = &eng_grps->grp[grp->mirror.idx]; + pr_debug("Ucode0 filename %s, version %s", + grp->mirror.is_ena ? + mirrored_grp->ucode[0].filename : + grp->ucode[0].filename, + grp->mirror.is_ena ? + mirrored_grp->ucode[0].ver_str : + grp->ucode[0].ver_str); + if (is_2nd_ucode_used(grp)) + pr_debug("Ucode1 filename %s, version %s", + grp->ucode[1].filename, + grp->ucode[1].ver_str); + } + + for (j = 0; j < OTX2_CPT_MAX_ETYPES_PER_GRP; j++) { + engs = &grp->engs[j]; + if (engs->type) { + u32 mask[5] = { }; + + get_engs_info(grp, engs_info, + 2 * OTX2_CPT_NAME_LENGTH, j); + pr_debug("Slot%d: %s", j, engs_info); + bitmap_to_arr32(mask, engs->bmap, + eng_grps->engs_num); + if (is_dev_otx2(cptpf->pdev)) + pr_debug("Mask: %8.8x %8.8x %8.8x %8.8x", + mask[3], mask[2], mask[1], + mask[0]); + else + pr_debug("Mask: %8.8x %8.8x %8.8x %8.8x %8.8x", + mask[4], mask[3], mask[2], mask[1], + mask[0]); + } + } + } +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h new file mode 100644 index 0000000000..e69320a54b --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h @@ -0,0 +1,172 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPTPF_UCODE_H +#define __OTX2_CPTPF_UCODE_H + +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/module.h> +#include "otx2_cpt_hw_types.h" +#include "otx2_cpt_common.h" + +/* + * On OcteonTX2 platform IPSec ucode can use both IE and SE engines therefore + * IE and SE engines can be attached to the same engine group. + */ +#define OTX2_CPT_MAX_ETYPES_PER_GRP 2 + +/* CPT ucode signature size */ +#define OTX2_CPT_UCODE_SIGN_LEN 256 + +/* Microcode version string length */ +#define OTX2_CPT_UCODE_VER_STR_SZ 44 + +/* Maximum number of supported engines/cores on OcteonTX2/CN10K platform */ +#define OTX2_CPT_MAX_ENGINES 144 + +#define OTX2_CPT_ENGS_BITMASK_LEN BITS_TO_LONGS(OTX2_CPT_MAX_ENGINES) + +#define OTX2_CPT_UCODE_SZ (64 * 1024) + +/* Microcode types */ +enum otx2_cpt_ucode_type { + OTX2_CPT_AE_UC_TYPE = 1, /* AE-MAIN */ + OTX2_CPT_SE_UC_TYPE1 = 20,/* SE-MAIN - combination of 21 and 22 */ + OTX2_CPT_SE_UC_TYPE2 = 21,/* Fast Path IPSec + AirCrypto */ + OTX2_CPT_SE_UC_TYPE3 = 22,/* + * Hash + HMAC + FlexiCrypto + RNG + + * Full Feature IPSec + AirCrypto + Kasumi + */ + OTX2_CPT_IE_UC_TYPE1 = 30, /* IE-MAIN - combination of 31 and 32 */ + OTX2_CPT_IE_UC_TYPE2 = 31, /* Fast Path IPSec */ + OTX2_CPT_IE_UC_TYPE3 = 32, /* + * Hash + HMAC + FlexiCrypto + RNG + + * Full Future IPSec + */ +}; + +struct otx2_cpt_bitmap { + unsigned long bits[OTX2_CPT_ENGS_BITMASK_LEN]; + int size; +}; + +struct otx2_cpt_engines { + int type; + int count; +}; + +/* Microcode version number */ +struct otx2_cpt_ucode_ver_num { + u8 nn; + u8 xx; + u8 yy; + u8 zz; +}; + +struct otx2_cpt_ucode_hdr { + struct otx2_cpt_ucode_ver_num ver_num; + u8 ver_str[OTX2_CPT_UCODE_VER_STR_SZ]; + __be32 code_length; + u32 padding[3]; +}; + +struct otx2_cpt_ucode { + u8 ver_str[OTX2_CPT_UCODE_VER_STR_SZ];/* + * ucode version in readable + * format + */ + struct otx2_cpt_ucode_ver_num ver_num;/* ucode version number */ + char filename[OTX2_CPT_NAME_LENGTH];/* ucode filename */ + dma_addr_t dma; /* phys address of ucode image */ + void *va; /* virt address of ucode image */ + u32 size; /* ucode image size */ + int type; /* ucode image type SE, IE, AE or SE+IE */ +}; + +struct otx2_cpt_uc_info_t { + struct list_head list; + struct otx2_cpt_ucode ucode;/* microcode information */ + const struct firmware *fw; +}; + +/* Maximum and current number of engines available for all engine groups */ +struct otx2_cpt_engs_available { + int max_se_cnt; + int max_ie_cnt; + int max_ae_cnt; + int se_cnt; + int ie_cnt; + int ae_cnt; +}; + +/* Engines reserved to an engine group */ +struct otx2_cpt_engs_rsvd { + int type; /* engine type */ + int count; /* number of engines attached */ + int offset; /* constant offset of engine type in the bitmap */ + unsigned long *bmap; /* attached engines bitmap */ + struct otx2_cpt_ucode *ucode; /* ucode used by these engines */ +}; + +struct otx2_cpt_mirror_info { + int is_ena; /* + * is mirroring enabled, it is set only for engine + * group which mirrors another engine group + */ + int idx; /* + * index of engine group which is mirrored by this + * group, set only for engine group which mirrors + * another group + */ + int ref_count; /* + * number of times this engine group is mirrored by + * other groups, this is set only for engine group + * which is mirrored by other group(s) + */ +}; + +struct otx2_cpt_eng_grp_info { + struct otx2_cpt_eng_grps *g; /* pointer to engine_groups structure */ + /* engines attached */ + struct otx2_cpt_engs_rsvd engs[OTX2_CPT_MAX_ETYPES_PER_GRP]; + /* ucodes information */ + struct otx2_cpt_ucode ucode[OTX2_CPT_MAX_ETYPES_PER_GRP]; + /* engine group mirroring information */ + struct otx2_cpt_mirror_info mirror; + int idx; /* engine group index */ + bool is_enabled; /* + * is engine group enabled, engine group is enabled + * when it has engines attached and ucode loaded + */ +}; + +struct otx2_cpt_eng_grps { + struct mutex lock; + struct otx2_cpt_eng_grp_info grp[OTX2_CPT_MAX_ENGINE_GROUPS]; + struct otx2_cpt_engs_available avail; + void *obj; /* device specific data */ + int engs_num; /* total number of engines supported */ + u8 eng_ref_cnt[OTX2_CPT_MAX_ENGINES];/* engines reference count */ + bool is_grps_created; /* Is the engine groups are already created */ +}; +struct otx2_cptpf_dev; +int otx2_cpt_init_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps); +void otx2_cpt_cleanup_eng_grps(struct pci_dev *pdev, + struct otx2_cpt_eng_grps *eng_grps); +int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf, + struct otx2_cpt_eng_grps *eng_grps); +int otx2_cpt_disable_all_cores(struct otx2_cptpf_dev *cptpf); +int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type); +int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf); +int otx2_cpt_dl_custom_egrp_create(struct otx2_cptpf_dev *cptpf, + struct devlink_param_gset_ctx *ctx); +int otx2_cpt_dl_custom_egrp_delete(struct otx2_cptpf_dev *cptpf, + struct devlink_param_gset_ctx *ctx); +void otx2_cpt_print_uc_dbg_info(struct otx2_cptpf_dev *cptpf); +struct otx2_cpt_engs_rsvd *find_engines_by_type( + struct otx2_cpt_eng_grp_info *eng_grp, + int eng_type); +#endif /* __OTX2_CPTPF_UCODE_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h new file mode 100644 index 0000000000..994291e90d --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPTVF_H +#define __OTX2_CPTVF_H + +#include "mbox.h" +#include "otx2_cptlf.h" + +struct otx2_cptvf_dev { + void __iomem *reg_base; /* Register start address */ + void __iomem *pfvf_mbox_base; /* PF-VF mbox start address */ + struct pci_dev *pdev; /* PCI device handle */ + struct otx2_cptlfs_info lfs; /* CPT LFs attached to this VF */ + u8 vf_id; /* Virtual function index */ + + /* PF <=> VF mbox */ + struct otx2_mbox pfvf_mbox; + struct work_struct pfvf_mbox_work; + struct workqueue_struct *pfvf_mbox_wq; + int blkaddr; + void *bbuf_base; + unsigned long cap_flag; +}; + +irqreturn_t otx2_cptvf_pfvf_mbox_intr(int irq, void *arg); +void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work); +int otx2_cptvf_send_eng_grp_num_msg(struct otx2_cptvf_dev *cptvf, int eng_type); +int otx2_cptvf_send_kvf_limits_msg(struct otx2_cptvf_dev *cptvf); +int otx2_cpt_mbox_bbuf_init(struct otx2_cptvf_dev *cptvf, struct pci_dev *pdev); + +#endif /* __OTX2_CPTVF_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c new file mode 100644 index 0000000000..e27ddd3c4e --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -0,0 +1,1749 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include <crypto/aes.h> +#include <crypto/authenc.h> +#include <crypto/cryptd.h> +#include <crypto/des.h> +#include <crypto/internal/aead.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> +#include <crypto/xts.h> +#include <crypto/gcm.h> +#include <crypto/scatterwalk.h> +#include <linux/rtnetlink.h> +#include <linux/sort.h> +#include <linux/module.h> +#include "otx2_cptvf.h" +#include "otx2_cptvf_algs.h" +#include "otx2_cpt_reqmgr.h" + +/* Size of salt in AES GCM mode */ +#define AES_GCM_SALT_SIZE 4 +/* Size of IV in AES GCM mode */ +#define AES_GCM_IV_SIZE 8 +/* Size of ICV (Integrity Check Value) in AES GCM mode */ +#define AES_GCM_ICV_SIZE 16 +/* Offset of IV in AES GCM mode */ +#define AES_GCM_IV_OFFSET 8 +#define CONTROL_WORD_LEN 8 +#define KEY2_OFFSET 48 +#define DMA_MODE_FLAG(dma_mode) \ + (((dma_mode) == OTX2_CPT_DMA_MODE_SG) ? (1 << 7) : 0) + +/* Truncated SHA digest size */ +#define SHA1_TRUNC_DIGEST_SIZE 12 +#define SHA256_TRUNC_DIGEST_SIZE 16 +#define SHA384_TRUNC_DIGEST_SIZE 24 +#define SHA512_TRUNC_DIGEST_SIZE 32 + +static DEFINE_MUTEX(mutex); +static int is_crypto_registered; + +struct cpt_device_desc { + struct pci_dev *dev; + int num_queues; +}; + +struct cpt_device_table { + atomic_t count; + struct cpt_device_desc desc[OTX2_CPT_MAX_LFS_NUM]; +}; + +static struct cpt_device_table se_devices = { + .count = ATOMIC_INIT(0) +}; + +static inline int get_se_device(struct pci_dev **pdev, int *cpu_num) +{ + int count; + + count = atomic_read(&se_devices.count); + if (count < 1) + return -ENODEV; + + *cpu_num = get_cpu(); + /* + * On OcteonTX2 platform CPT instruction queue is bound to each + * local function LF, in turn LFs can be attached to PF + * or VF therefore we always use first device. We get maximum + * performance if one CPT queue is available for each cpu + * otherwise CPT queues need to be shared between cpus. + */ + if (*cpu_num >= se_devices.desc[0].num_queues) + *cpu_num %= se_devices.desc[0].num_queues; + *pdev = se_devices.desc[0].dev; + + put_cpu(); + + return 0; +} + +static inline int validate_hmac_cipher_null(struct otx2_cpt_req_info *cpt_req) +{ + struct otx2_cpt_req_ctx *rctx; + struct aead_request *req; + struct crypto_aead *tfm; + + req = container_of(cpt_req->areq, struct aead_request, base); + tfm = crypto_aead_reqtfm(req); + rctx = aead_request_ctx_dma(req); + if (memcmp(rctx->fctx.hmac.s.hmac_calc, + rctx->fctx.hmac.s.hmac_recv, + crypto_aead_authsize(tfm)) != 0) + return -EBADMSG; + + return 0; +} + +static void otx2_cpt_aead_callback(int status, void *arg1, void *arg2) +{ + struct otx2_cpt_inst_info *inst_info = arg2; + struct crypto_async_request *areq = arg1; + struct otx2_cpt_req_info *cpt_req; + struct pci_dev *pdev; + + if (inst_info) { + cpt_req = inst_info->req; + if (!status) { + /* + * When selected cipher is NULL we need to manually + * verify whether calculated hmac value matches + * received hmac value + */ + if (cpt_req->req_type == + OTX2_CPT_AEAD_ENC_DEC_NULL_REQ && + !cpt_req->is_enc) + status = validate_hmac_cipher_null(cpt_req); + } + pdev = inst_info->pdev; + otx2_cpt_info_destroy(pdev, inst_info); + } + if (areq) + crypto_request_complete(areq, status); +} + +static void output_iv_copyback(struct crypto_async_request *areq) +{ + struct otx2_cpt_req_info *req_info; + struct otx2_cpt_req_ctx *rctx; + struct skcipher_request *sreq; + struct crypto_skcipher *stfm; + struct otx2_cpt_enc_ctx *ctx; + u32 start, ivsize; + + sreq = container_of(areq, struct skcipher_request, base); + stfm = crypto_skcipher_reqtfm(sreq); + ctx = crypto_skcipher_ctx(stfm); + if (ctx->cipher_type == OTX2_CPT_AES_CBC || + ctx->cipher_type == OTX2_CPT_DES3_CBC) { + rctx = skcipher_request_ctx_dma(sreq); + req_info = &rctx->cpt_req; + ivsize = crypto_skcipher_ivsize(stfm); + start = sreq->cryptlen - ivsize; + + if (req_info->is_enc) { + scatterwalk_map_and_copy(sreq->iv, sreq->dst, start, + ivsize, 0); + } else { + if (sreq->src != sreq->dst) { + scatterwalk_map_and_copy(sreq->iv, sreq->src, + start, ivsize, 0); + } else { + memcpy(sreq->iv, req_info->iv_out, ivsize); + kfree(req_info->iv_out); + } + } + } +} + +static void otx2_cpt_skcipher_callback(int status, void *arg1, void *arg2) +{ + struct otx2_cpt_inst_info *inst_info = arg2; + struct crypto_async_request *areq = arg1; + struct pci_dev *pdev; + + if (areq) { + if (!status) + output_iv_copyback(areq); + if (inst_info) { + pdev = inst_info->pdev; + otx2_cpt_info_destroy(pdev, inst_info); + } + crypto_request_complete(areq, status); + } +} + +static inline void update_input_data(struct otx2_cpt_req_info *req_info, + struct scatterlist *inp_sg, + u32 nbytes, u32 *argcnt) +{ + req_info->req.dlen += nbytes; + + while (nbytes) { + u32 len = (nbytes < inp_sg->length) ? nbytes : inp_sg->length; + u8 *ptr = sg_virt(inp_sg); + + req_info->in[*argcnt].vptr = (void *)ptr; + req_info->in[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + inp_sg = sg_next(inp_sg); + } +} + +static inline void update_output_data(struct otx2_cpt_req_info *req_info, + struct scatterlist *outp_sg, + u32 offset, u32 nbytes, u32 *argcnt) +{ + u32 len, sg_len; + u8 *ptr; + + req_info->rlen += nbytes; + + while (nbytes) { + sg_len = outp_sg->length - offset; + len = (nbytes < sg_len) ? nbytes : sg_len; + ptr = sg_virt(outp_sg); + + req_info->out[*argcnt].vptr = (void *) (ptr + offset); + req_info->out[*argcnt].size = len; + nbytes -= len; + ++(*argcnt); + offset = 0; + outp_sg = sg_next(outp_sg); + } +} + +static inline int create_ctx_hdr(struct skcipher_request *req, u32 enc, + u32 *argcnt) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + struct otx2_cpt_fc_ctx *fctx = &rctx->fctx; + int ivsize = crypto_skcipher_ivsize(stfm); + u32 start = req->cryptlen - ivsize; + gfp_t flags; + + flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC; + req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG; + req_info->ctrl.s.se_req = 1; + + req_info->req.opcode.s.major = OTX2_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX2_CPT_DMA_MODE_SG); + if (enc) { + req_info->req.opcode.s.minor = 2; + } else { + req_info->req.opcode.s.minor = 3; + if ((ctx->cipher_type == OTX2_CPT_AES_CBC || + ctx->cipher_type == OTX2_CPT_DES3_CBC) && + req->src == req->dst) { + req_info->iv_out = kmalloc(ivsize, flags); + if (!req_info->iv_out) + return -ENOMEM; + + scatterwalk_map_and_copy(req_info->iv_out, req->src, + start, ivsize, 0); + } + } + /* Encryption data length */ + req_info->req.param1 = req->cryptlen; + /* Authentication data length */ + req_info->req.param2 = 0; + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.iv_source = OTX2_CPT_FROM_CPTR; + + if (ctx->cipher_type == OTX2_CPT_AES_XTS) + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len * 2); + else + memcpy(fctx->enc.encr_key, ctx->enc_key, ctx->key_len); + + memcpy(fctx->enc.encr_iv, req->iv, crypto_skcipher_ivsize(stfm)); + + cpu_to_be64s(&fctx->enc.enc_ctrl.u); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx2_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx2_cpt_fc_ctx); + + ++(*argcnt); + + return 0; +} + +static inline int create_input_list(struct skcipher_request *req, u32 enc, + u32 enc_iv_len) +{ + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + int ret; + + ret = create_ctx_hdr(req, enc, &argcnt); + if (ret) + return ret; + + update_input_data(req_info, req->src, req->cryptlen, &argcnt); + req_info->in_cnt = argcnt; + + return 0; +} + +static inline void create_output_list(struct skcipher_request *req, + u32 enc_iv_len) +{ + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0; + + /* + * OUTPUT Buffer Processing + * AES encryption/decryption output would be + * received in the following format + * + * ------IV--------|------ENCRYPTED/DECRYPTED DATA-----| + * [ 16 Bytes/ [ Request Enc/Dec/ DATA Len AES CBC ] + */ + update_output_data(req_info, req->dst, 0, req->cryptlen, &argcnt); + req_info->out_cnt = argcnt; +} + +static int skcipher_do_fallback(struct skcipher_request *req, bool is_enc) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); + int ret; + + if (ctx->fbk_cipher) { + skcipher_request_set_tfm(&rctx->sk_fbk_req, ctx->fbk_cipher); + skcipher_request_set_callback(&rctx->sk_fbk_req, + req->base.flags, + req->base.complete, + req->base.data); + skcipher_request_set_crypt(&rctx->sk_fbk_req, req->src, + req->dst, req->cryptlen, req->iv); + ret = is_enc ? crypto_skcipher_encrypt(&rctx->sk_fbk_req) : + crypto_skcipher_decrypt(&rctx->sk_fbk_req); + } else { + ret = -EINVAL; + } + return ret; +} + +static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) +{ + struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 enc_iv_len = crypto_skcipher_ivsize(stfm); + struct pci_dev *pdev; + int status, cpu_num; + + if (req->cryptlen == 0) + return 0; + + if (!IS_ALIGNED(req->cryptlen, ctx->enc_align_len)) + return -EINVAL; + + if (req->cryptlen > OTX2_CPT_MAX_REQ_SIZE) + return skcipher_do_fallback(req, enc); + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.u = 0; + + status = create_input_list(req, enc, enc_iv_len); + if (status) + return status; + create_output_list(req, enc_iv_len); + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->callback = otx2_cpt_skcipher_callback; + req_info->areq = &req->base; + req_info->req_type = OTX2_CPT_ENC_DEC_REQ; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + req_info->ctrl.s.grp = otx2_cpt_get_kcrypto_eng_grp_num(pdev); + + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + status = otx2_cpt_do_request(pdev, req_info, cpu_num); + + return status; +} + +static int otx2_cpt_skcipher_encrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, true); +} + +static int otx2_cpt_skcipher_decrypt(struct skcipher_request *req) +{ + return cpt_enc_dec(req, false); +} + +static int otx2_cpt_skcipher_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + const u8 *key2 = key + (keylen / 2); + const u8 *key1 = key; + int ret; + + ret = xts_verify_key(tfm, key, keylen); + if (ret) + return ret; + ctx->key_len = keylen; + ctx->enc_align_len = 1; + memcpy(ctx->enc_key, key1, keylen / 2); + memcpy(ctx->enc_key + KEY2_OFFSET, key2, keylen / 2); + ctx->cipher_type = OTX2_CPT_AES_XTS; + switch (ctx->key_len) { + case 2 * AES_KEYSIZE_128: + ctx->key_type = OTX2_CPT_AES_128_BIT; + break; + case 2 * AES_KEYSIZE_192: + ctx->key_type = OTX2_CPT_AES_192_BIT; + break; + case 2 * AES_KEYSIZE_256: + ctx->key_type = OTX2_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + return crypto_skcipher_setkey(ctx->fbk_cipher, key, keylen); +} + +static int cpt_des_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (keylen != DES3_EDE_KEY_SIZE) + return -EINVAL; + + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + ctx->enc_align_len = 8; + + memcpy(ctx->enc_key, key, keylen); + + return crypto_skcipher_setkey(ctx->fbk_cipher, key, keylen); +} + +static int cpt_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + u32 keylen, u8 cipher_type) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + switch (keylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX2_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX2_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX2_CPT_AES_256_BIT; + break; + default: + return -EINVAL; + } + if (cipher_type == OTX2_CPT_AES_CBC || cipher_type == OTX2_CPT_AES_ECB) + ctx->enc_align_len = 16; + else + ctx->enc_align_len = 1; + + ctx->key_len = keylen; + ctx->cipher_type = cipher_type; + + memcpy(ctx->enc_key, key, keylen); + + return crypto_skcipher_setkey(ctx->fbk_cipher, key, keylen); +} + +static int otx2_cpt_skcipher_cbc_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX2_CPT_AES_CBC); +} + +static int otx2_cpt_skcipher_ecb_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_aes_setkey(tfm, key, keylen, OTX2_CPT_AES_ECB); +} + +static int otx2_cpt_skcipher_cbc_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX2_CPT_DES3_CBC); +} + +static int otx2_cpt_skcipher_ecb_des3_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + return cpt_des_setkey(tfm, key, keylen, OTX2_CPT_DES3_ECB); +} + +static int cpt_skcipher_fallback_init(struct otx2_cpt_enc_ctx *ctx, + struct crypto_alg *alg) +{ + if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { + ctx->fbk_cipher = + crypto_alloc_skcipher(alg->cra_name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fbk_cipher)) { + pr_err("%s() failed to allocate fallback for %s\n", + __func__, alg->cra_name); + return PTR_ERR(ctx->fbk_cipher); + } + } + return 0; +} + +static int otx2_cpt_enc_dec_init(struct crypto_skcipher *stfm) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); + struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); + struct crypto_alg *alg = tfm->__crt_alg; + + memset(ctx, 0, sizeof(*ctx)); + /* + * Additional memory for skcipher_request is + * allocated since the cryptd daemon uses + * this memory for request_ctx information + */ + crypto_skcipher_set_reqsize_dma( + stfm, sizeof(struct otx2_cpt_req_ctx) + + sizeof(struct skcipher_request)); + + return cpt_skcipher_fallback_init(ctx, alg); +} + +static void otx2_cpt_skcipher_exit(struct crypto_skcipher *tfm) +{ + struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (ctx->fbk_cipher) { + crypto_free_skcipher(ctx->fbk_cipher); + ctx->fbk_cipher = NULL; + } +} + +static int cpt_aead_fallback_init(struct otx2_cpt_aead_ctx *ctx, + struct crypto_alg *alg) +{ + if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { + ctx->fbk_cipher = + crypto_alloc_aead(alg->cra_name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fbk_cipher)) { + pr_err("%s() failed to allocate fallback for %s\n", + __func__, alg->cra_name); + return PTR_ERR(ctx->fbk_cipher); + } + } + return 0; +} + +static int cpt_aead_init(struct crypto_aead *atfm, u8 cipher_type, u8 mac_type) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(atfm); + struct crypto_tfm *tfm = crypto_aead_tfm(atfm); + struct crypto_alg *alg = tfm->__crt_alg; + + ctx->cipher_type = cipher_type; + ctx->mac_type = mac_type; + + /* + * When selected cipher is NULL we use HMAC opcode instead of + * FLEXICRYPTO opcode therefore we don't need to use HASH algorithms + * for calculating ipad and opad + */ + if (ctx->cipher_type != OTX2_CPT_CIPHER_NULL) { + switch (ctx->mac_type) { + case OTX2_CPT_SHA1: + ctx->hashalg = crypto_alloc_shash("sha1", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX2_CPT_SHA256: + ctx->hashalg = crypto_alloc_shash("sha256", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX2_CPT_SHA384: + ctx->hashalg = crypto_alloc_shash("sha384", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + + case OTX2_CPT_SHA512: + ctx->hashalg = crypto_alloc_shash("sha512", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(ctx->hashalg)) + return PTR_ERR(ctx->hashalg); + break; + } + } + switch (ctx->cipher_type) { + case OTX2_CPT_AES_CBC: + case OTX2_CPT_AES_ECB: + ctx->enc_align_len = 16; + break; + case OTX2_CPT_DES3_CBC: + case OTX2_CPT_DES3_ECB: + ctx->enc_align_len = 8; + break; + case OTX2_CPT_AES_GCM: + case OTX2_CPT_CIPHER_NULL: + ctx->enc_align_len = 1; + break; + } + crypto_aead_set_reqsize_dma(atfm, sizeof(struct otx2_cpt_req_ctx)); + + return cpt_aead_fallback_init(ctx, alg); +} + +static int otx2_cpt_aead_cbc_aes_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_CBC, OTX2_CPT_SHA1); +} + +static int otx2_cpt_aead_cbc_aes_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_CBC, OTX2_CPT_SHA256); +} + +static int otx2_cpt_aead_cbc_aes_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_CBC, OTX2_CPT_SHA384); +} + +static int otx2_cpt_aead_cbc_aes_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_CBC, OTX2_CPT_SHA512); +} + +static int otx2_cpt_aead_ecb_null_sha1_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_CIPHER_NULL, OTX2_CPT_SHA1); +} + +static int otx2_cpt_aead_ecb_null_sha256_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_CIPHER_NULL, OTX2_CPT_SHA256); +} + +static int otx2_cpt_aead_ecb_null_sha384_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_CIPHER_NULL, OTX2_CPT_SHA384); +} + +static int otx2_cpt_aead_ecb_null_sha512_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_CIPHER_NULL, OTX2_CPT_SHA512); +} + +static int otx2_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) +{ + return cpt_aead_init(tfm, OTX2_CPT_AES_GCM, OTX2_CPT_MAC_NULL); +} + +static void otx2_cpt_aead_exit(struct crypto_aead *tfm) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + + kfree(ctx->ipad); + kfree(ctx->opad); + if (ctx->hashalg) + crypto_free_shash(ctx->hashalg); + kfree(ctx->sdesc); + + if (ctx->fbk_cipher) { + crypto_free_aead(ctx->fbk_cipher); + ctx->fbk_cipher = NULL; + } +} + +static int otx2_cpt_aead_gcm_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + + if (crypto_rfc4106_check_authsize(authsize)) + return -EINVAL; + + tfm->authsize = authsize; + /* Set authsize for fallback case */ + if (ctx->fbk_cipher) + ctx->fbk_cipher->authsize = authsize; + + return 0; +} + +static int otx2_cpt_aead_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + tfm->authsize = authsize; + + return 0; +} + +static int otx2_cpt_aead_null_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + + ctx->is_trunc_hmac = true; + tfm->authsize = authsize; + + return 0; +} + +static struct otx2_cpt_sdesc *alloc_sdesc(struct crypto_shash *alg) +{ + struct otx2_cpt_sdesc *sdesc; + int size; + + size = sizeof(struct shash_desc) + crypto_shash_descsize(alg); + sdesc = kmalloc(size, GFP_KERNEL); + if (!sdesc) + return NULL; + + sdesc->shash.tfm = alg; + + return sdesc; +} + +static inline void swap_data32(void *buf, u32 len) +{ + cpu_to_be32_array(buf, buf, len / 4); +} + +static inline void swap_data64(void *buf, u32 len) +{ + u64 *src = buf; + int i = 0; + + for (i = 0 ; i < len / 8; i++, src++) + cpu_to_be64s(src); +} + +static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) +{ + struct sha512_state *sha512; + struct sha256_state *sha256; + struct sha1_state *sha1; + + switch (mac_type) { + case OTX2_CPT_SHA1: + sha1 = (struct sha1_state *) in_pad; + swap_data32(sha1->state, SHA1_DIGEST_SIZE); + memcpy(out_pad, &sha1->state, SHA1_DIGEST_SIZE); + break; + + case OTX2_CPT_SHA256: + sha256 = (struct sha256_state *) in_pad; + swap_data32(sha256->state, SHA256_DIGEST_SIZE); + memcpy(out_pad, &sha256->state, SHA256_DIGEST_SIZE); + break; + + case OTX2_CPT_SHA384: + case OTX2_CPT_SHA512: + sha512 = (struct sha512_state *) in_pad; + swap_data64(sha512->state, SHA512_DIGEST_SIZE); + memcpy(out_pad, &sha512->state, SHA512_DIGEST_SIZE); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static int aead_hmac_init(struct crypto_aead *cipher) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); + int state_size = crypto_shash_statesize(ctx->hashalg); + int ds = crypto_shash_digestsize(ctx->hashalg); + int bs = crypto_shash_blocksize(ctx->hashalg); + int authkeylen = ctx->auth_key_len; + u8 *ipad = NULL, *opad = NULL; + int ret = 0, icount = 0; + + ctx->sdesc = alloc_sdesc(ctx->hashalg); + if (!ctx->sdesc) + return -ENOMEM; + + ctx->ipad = kzalloc(bs, GFP_KERNEL); + if (!ctx->ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + ctx->opad = kzalloc(bs, GFP_KERNEL); + if (!ctx->opad) { + ret = -ENOMEM; + goto calc_fail; + } + + ipad = kzalloc(state_size, GFP_KERNEL); + if (!ipad) { + ret = -ENOMEM; + goto calc_fail; + } + + opad = kzalloc(state_size, GFP_KERNEL); + if (!opad) { + ret = -ENOMEM; + goto calc_fail; + } + + if (authkeylen > bs) { + ret = crypto_shash_digest(&ctx->sdesc->shash, ctx->key, + authkeylen, ipad); + if (ret) + goto calc_fail; + + authkeylen = ds; + } else { + memcpy(ipad, ctx->key, authkeylen); + } + + memset(ipad + authkeylen, 0, bs - authkeylen); + memcpy(opad, ipad, bs); + + for (icount = 0; icount < bs; icount++) { + ipad[icount] ^= 0x36; + opad[icount] ^= 0x5c; + } + + /* + * Partial Hash calculated from the software + * algorithm is retrieved for IPAD & OPAD + */ + + /* IPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, ipad, bs); + crypto_shash_export(&ctx->sdesc->shash, ipad); + ret = copy_pad(ctx->mac_type, ctx->ipad, ipad); + if (ret) + goto calc_fail; + + /* OPAD Calculation */ + crypto_shash_init(&ctx->sdesc->shash); + crypto_shash_update(&ctx->sdesc->shash, opad, bs); + crypto_shash_export(&ctx->sdesc->shash, opad); + ret = copy_pad(ctx->mac_type, ctx->opad, opad); + if (ret) + goto calc_fail; + + kfree(ipad); + kfree(opad); + + return 0; + +calc_fail: + kfree(ctx->ipad); + ctx->ipad = NULL; + kfree(ctx->opad); + ctx->opad = NULL; + kfree(ipad); + kfree(opad); + kfree(ctx->sdesc); + ctx->sdesc = NULL; + + return ret; +} + +static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); + struct crypto_authenc_key_param *param; + int enckeylen = 0, authkeylen = 0; + struct rtattr *rta = (void *)key; + + if (!RTA_OK(rta, keylen)) + return -EINVAL; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + return -EINVAL; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + return -EINVAL; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (keylen < enckeylen) + return -EINVAL; + + if (keylen > OTX2_CPT_MAX_KEY_SIZE) + return -EINVAL; + + authkeylen = keylen - enckeylen; + memcpy(ctx->key, key, keylen); + + switch (enckeylen) { + case AES_KEYSIZE_128: + ctx->key_type = OTX2_CPT_AES_128_BIT; + break; + case AES_KEYSIZE_192: + ctx->key_type = OTX2_CPT_AES_192_BIT; + break; + case AES_KEYSIZE_256: + ctx->key_type = OTX2_CPT_AES_256_BIT; + break; + default: + /* Invalid key length */ + return -EINVAL; + } + + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = authkeylen; + + return aead_hmac_init(cipher); +} + +static int otx2_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); + struct crypto_authenc_key_param *param; + struct rtattr *rta = (void *)key; + int enckeylen = 0; + + if (!RTA_OK(rta, keylen)) + return -EINVAL; + + if (rta->rta_type != CRYPTO_AUTHENC_KEYA_PARAM) + return -EINVAL; + + if (RTA_PAYLOAD(rta) < sizeof(*param)) + return -EINVAL; + + param = RTA_DATA(rta); + enckeylen = be32_to_cpu(param->enckeylen); + key += RTA_ALIGN(rta->rta_len); + keylen -= RTA_ALIGN(rta->rta_len); + if (enckeylen != 0) + return -EINVAL; + + if (keylen > OTX2_CPT_MAX_KEY_SIZE) + return -EINVAL; + + memcpy(ctx->key, key, keylen); + ctx->enc_key_len = enckeylen; + ctx->auth_key_len = keylen; + + return 0; +} + +static int otx2_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, + const unsigned char *key, + unsigned int keylen) +{ + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); + + /* + * For aes gcm we expect to get encryption key (16, 24, 32 bytes) + * and salt (4 bytes) + */ + switch (keylen) { + case AES_KEYSIZE_128 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX2_CPT_AES_128_BIT; + ctx->enc_key_len = AES_KEYSIZE_128; + break; + case AES_KEYSIZE_192 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX2_CPT_AES_192_BIT; + ctx->enc_key_len = AES_KEYSIZE_192; + break; + case AES_KEYSIZE_256 + AES_GCM_SALT_SIZE: + ctx->key_type = OTX2_CPT_AES_256_BIT; + ctx->enc_key_len = AES_KEYSIZE_256; + break; + default: + /* Invalid key and salt length */ + return -EINVAL; + } + + /* Store encryption key and salt */ + memcpy(ctx->key, key, keylen); + + return crypto_aead_setkey(ctx->fbk_cipher, key, keylen); +} + +static inline int create_aead_ctx_hdr(struct aead_request *req, u32 enc, + u32 *argcnt) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + struct otx2_cpt_fc_ctx *fctx = &rctx->fctx; + int mac_len = crypto_aead_authsize(tfm); + int ds; + + rctx->ctrl_word.e.enc_data_offset = req->assoclen; + + switch (ctx->cipher_type) { + case OTX2_CPT_AES_CBC: + if (req->assoclen > 248 || !IS_ALIGNED(req->assoclen, 8)) + return -EINVAL; + + fctx->enc.enc_ctrl.e.iv_source = OTX2_CPT_FROM_CPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key + ctx->auth_key_len, + ctx->enc_key_len); + /* Copy IV to context */ + memcpy(fctx->enc.encr_iv, req->iv, crypto_aead_ivsize(tfm)); + + ds = crypto_shash_digestsize(ctx->hashalg); + if (ctx->mac_type == OTX2_CPT_SHA384) + ds = SHA512_DIGEST_SIZE; + if (ctx->ipad) + memcpy(fctx->hmac.e.ipad, ctx->ipad, ds); + if (ctx->opad) + memcpy(fctx->hmac.e.opad, ctx->opad, ds); + break; + + case OTX2_CPT_AES_GCM: + if (crypto_ipsec_check_assoclen(req->assoclen)) + return -EINVAL; + + fctx->enc.enc_ctrl.e.iv_source = OTX2_CPT_FROM_DPTR; + /* Copy encryption key to context */ + memcpy(fctx->enc.encr_key, ctx->key, ctx->enc_key_len); + /* Copy salt to context */ + memcpy(fctx->enc.encr_iv, ctx->key + ctx->enc_key_len, + AES_GCM_SALT_SIZE); + + rctx->ctrl_word.e.iv_offset = req->assoclen - AES_GCM_IV_OFFSET; + break; + + default: + /* Unknown cipher type */ + return -EINVAL; + } + cpu_to_be64s(&rctx->ctrl_word.flags); + + req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG; + req_info->ctrl.s.se_req = 1; + req_info->req.opcode.s.major = OTX2_CPT_MAJOR_OP_FC | + DMA_MODE_FLAG(OTX2_CPT_DMA_MODE_SG); + if (enc) { + req_info->req.opcode.s.minor = 2; + req_info->req.param1 = req->cryptlen; + req_info->req.param2 = req->cryptlen + req->assoclen; + } else { + req_info->req.opcode.s.minor = 3; + req_info->req.param1 = req->cryptlen - mac_len; + req_info->req.param2 = req->cryptlen + req->assoclen - mac_len; + } + + fctx->enc.enc_ctrl.e.enc_cipher = ctx->cipher_type; + fctx->enc.enc_ctrl.e.aes_key = ctx->key_type; + fctx->enc.enc_ctrl.e.mac_type = ctx->mac_type; + fctx->enc.enc_ctrl.e.mac_len = mac_len; + cpu_to_be64s(&fctx->enc.enc_ctrl.u); + + /* + * Storing Packet Data Information in offset + * Control Word First 8 bytes + */ + req_info->in[*argcnt].vptr = (u8 *)&rctx->ctrl_word; + req_info->in[*argcnt].size = CONTROL_WORD_LEN; + req_info->req.dlen += CONTROL_WORD_LEN; + ++(*argcnt); + + req_info->in[*argcnt].vptr = (u8 *)fctx; + req_info->in[*argcnt].size = sizeof(struct otx2_cpt_fc_ctx); + req_info->req.dlen += sizeof(struct otx2_cpt_fc_ctx); + ++(*argcnt); + + return 0; +} + +static inline void create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, + u32 enc) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + + req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG; + req_info->ctrl.s.se_req = 1; + req_info->req.opcode.s.major = OTX2_CPT_MAJOR_OP_HMAC | + DMA_MODE_FLAG(OTX2_CPT_DMA_MODE_SG); + req_info->is_trunc_hmac = ctx->is_trunc_hmac; + + req_info->req.opcode.s.minor = 0; + req_info->req.param1 = ctx->auth_key_len; + req_info->req.param2 = ctx->mac_type << 8; + + /* Add authentication key */ + req_info->in[*argcnt].vptr = ctx->key; + req_info->in[*argcnt].size = round_up(ctx->auth_key_len, 8); + req_info->req.dlen += round_up(ctx->auth_key_len, 8); + ++(*argcnt); +} + +static inline int create_aead_input_list(struct aead_request *req, u32 enc) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen = req->cryptlen + req->assoclen; + u32 status, argcnt = 0; + + status = create_aead_ctx_hdr(req, enc, &argcnt); + if (status) + return status; + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->in_cnt = argcnt; + + return 0; +} + +static inline void create_aead_output_list(struct aead_request *req, u32 enc, + u32 mac_len) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 argcnt = 0, outputlen = 0; + + if (enc) + outputlen = req->cryptlen + req->assoclen + mac_len; + else + outputlen = req->cryptlen + req->assoclen - mac_len; + + update_output_data(req_info, req->dst, 0, outputlen, &argcnt); + req_info->out_cnt = argcnt; +} + +static inline void create_aead_null_input_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + u32 inputlen, argcnt = 0; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + create_hmac_ctx_hdr(req, &argcnt, enc); + update_input_data(req_info, req->src, inputlen, &argcnt); + req_info->in_cnt = argcnt; +} + +static inline int create_aead_null_output_list(struct aead_request *req, + u32 enc, u32 mac_len) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + struct scatterlist *dst; + u8 *ptr = NULL; + int argcnt = 0, status, offset; + u32 inputlen; + + if (enc) + inputlen = req->cryptlen + req->assoclen; + else + inputlen = req->cryptlen + req->assoclen - mac_len; + + /* + * If source and destination are different + * then copy payload to destination + */ + if (req->src != req->dst) { + + ptr = kmalloc(inputlen, (req_info->areq->flags & + CRYPTO_TFM_REQ_MAY_SLEEP) ? + GFP_KERNEL : GFP_ATOMIC); + if (!ptr) + return -ENOMEM; + + status = sg_copy_to_buffer(req->src, sg_nents(req->src), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error_free; + } + status = sg_copy_from_buffer(req->dst, sg_nents(req->dst), ptr, + inputlen); + if (status != inputlen) { + status = -EINVAL; + goto error_free; + } + kfree(ptr); + } + + if (enc) { + /* + * In an encryption scenario hmac needs + * to be appended after payload + */ + dst = req->dst; + offset = inputlen; + while (offset >= dst->length) { + offset -= dst->length; + dst = sg_next(dst); + if (!dst) + return -ENOENT; + } + + update_output_data(req_info, dst, offset, mac_len, &argcnt); + } else { + /* + * In a decryption scenario calculated hmac for received + * payload needs to be compare with hmac received + */ + status = sg_copy_buffer(req->src, sg_nents(req->src), + rctx->fctx.hmac.s.hmac_recv, mac_len, + inputlen, true); + if (status != mac_len) + return -EINVAL; + + req_info->out[argcnt].vptr = rctx->fctx.hmac.s.hmac_calc; + req_info->out[argcnt].size = mac_len; + argcnt++; + } + + req_info->out_cnt = argcnt; + return 0; + +error_free: + kfree(ptr); + return status; +} + +static int aead_do_fallback(struct aead_request *req, bool is_enc) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(aead); + int ret; + + if (ctx->fbk_cipher) { + /* Store the cipher tfm and then use the fallback tfm */ + aead_request_set_tfm(&rctx->fbk_req, ctx->fbk_cipher); + aead_request_set_callback(&rctx->fbk_req, req->base.flags, + req->base.complete, req->base.data); + aead_request_set_crypt(&rctx->fbk_req, req->src, + req->dst, req->cryptlen, req->iv); + aead_request_set_ad(&rctx->fbk_req, req->assoclen); + ret = is_enc ? crypto_aead_encrypt(&rctx->fbk_req) : + crypto_aead_decrypt(&rctx->fbk_req); + } else { + ret = -EINVAL; + } + + return ret; +} + +static int cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) +{ + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); + struct otx2_cpt_req_info *req_info = &rctx->cpt_req; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); + struct pci_dev *pdev; + int status, cpu_num; + + /* Clear control words */ + rctx->ctrl_word.flags = 0; + rctx->fctx.enc.enc_ctrl.u = 0; + + req_info->callback = otx2_cpt_aead_callback; + req_info->areq = &req->base; + req_info->req_type = reg_type; + req_info->is_enc = enc; + req_info->is_trunc_hmac = false; + + switch (reg_type) { + case OTX2_CPT_AEAD_ENC_DEC_REQ: + status = create_aead_input_list(req, enc); + if (status) + return status; + create_aead_output_list(req, enc, crypto_aead_authsize(tfm)); + break; + + case OTX2_CPT_AEAD_ENC_DEC_NULL_REQ: + create_aead_null_input_list(req, enc, + crypto_aead_authsize(tfm)); + status = create_aead_null_output_list(req, enc, + crypto_aead_authsize(tfm)); + if (status) + return status; + break; + + default: + return -EINVAL; + } + if (!IS_ALIGNED(req_info->req.param1, ctx->enc_align_len)) + return -EINVAL; + + if (!req_info->req.param2 || + (req_info->req.param1 > OTX2_CPT_MAX_REQ_SIZE) || + (req_info->req.param2 > OTX2_CPT_MAX_REQ_SIZE)) + return aead_do_fallback(req, enc); + + status = get_se_device(&pdev, &cpu_num); + if (status) + return status; + + req_info->ctrl.s.grp = otx2_cpt_get_kcrypto_eng_grp_num(pdev); + + /* + * We perform an asynchronous send and once + * the request is completed the driver would + * intimate through registered call back functions + */ + return otx2_cpt_do_request(pdev, req_info, cpu_num); +} + +static int otx2_cpt_aead_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX2_CPT_AEAD_ENC_DEC_REQ, true); +} + +static int otx2_cpt_aead_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX2_CPT_AEAD_ENC_DEC_REQ, false); +} + +static int otx2_cpt_aead_null_encrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX2_CPT_AEAD_ENC_DEC_NULL_REQ, true); +} + +static int otx2_cpt_aead_null_decrypt(struct aead_request *req) +{ + return cpt_aead_enc_dec(req, OTX2_CPT_AEAD_ENC_DEC_NULL_REQ, false); +} + +static struct skcipher_alg otx2_cpt_skciphers[] = { { + .base.cra_name = "xts(aes)", + .base.cra_driver_name = "cpt_xts_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = otx2_cpt_skcipher_xts_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(aes)", + .base.cra_driver_name = "cpt_cbc_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .ivsize = AES_BLOCK_SIZE, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx2_cpt_skcipher_cbc_aes_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(aes)", + .base.cra_driver_name = "cpt_ecb_aes", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .ivsize = 0, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .setkey = otx2_cpt_skcipher_ecb_aes_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +}, { + .base.cra_name = "cbc(des3_ede)", + .base.cra_driver_name = "cpt_cbc_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = otx2_cpt_skcipher_cbc_des3_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +}, { + .base.cra_name = "ecb(des3_ede)", + .base.cra_driver_name = "cpt_ecb_des3_ede", + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct otx2_cpt_enc_ctx), + .base.cra_alignmask = 7, + .base.cra_priority = 4001, + .base.cra_module = THIS_MODULE, + + .init = otx2_cpt_enc_dec_init, + .exit = otx2_cpt_skcipher_exit, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .ivsize = 0, + .setkey = otx2_cpt_skcipher_ecb_des3_setkey, + .encrypt = otx2_cpt_skcipher_encrypt, + .decrypt = otx2_cpt_skcipher_decrypt, +} }; + +static struct aead_alg otx2_cpt_aeads[] = { { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha1_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_cbc_aes_sha1_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx2_cpt_aead_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha256_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_cbc_aes_sha256_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx2_cpt_aead_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha384_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_cbc_aes_sha384_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx2_cpt_aead_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),cbc(aes))", + .cra_driver_name = "cpt_hmac_sha512_cbc_aes", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_cbc_aes_sha512_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_cbc_aes_sha_setkey, + .setauthsize = otx2_cpt_aead_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha1),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha1_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_ecb_null_sha1_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx2_cpt_aead_null_set_authsize, + .encrypt = otx2_cpt_aead_null_encrypt, + .decrypt = otx2_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA1_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha256),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha256_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_ecb_null_sha256_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx2_cpt_aead_null_set_authsize, + .encrypt = otx2_cpt_aead_null_encrypt, + .decrypt = otx2_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA256_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha384),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha384_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_ecb_null_sha384_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx2_cpt_aead_null_set_authsize, + .encrypt = otx2_cpt_aead_null_encrypt, + .decrypt = otx2_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA384_DIGEST_SIZE, +}, { + .base = { + .cra_name = "authenc(hmac(sha512),ecb(cipher_null))", + .cra_driver_name = "cpt_hmac_sha512_ecb_null", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_ecb_null_sha512_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_ecb_null_sha_setkey, + .setauthsize = otx2_cpt_aead_null_set_authsize, + .encrypt = otx2_cpt_aead_null_encrypt, + .decrypt = otx2_cpt_aead_null_decrypt, + .ivsize = 0, + .maxauthsize = SHA512_DIGEST_SIZE, +}, { + .base = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "cpt_rfc4106_gcm_aes", + .cra_blocksize = 1, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, + .cra_priority = 4001, + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + }, + .init = otx2_cpt_aead_gcm_aes_init, + .exit = otx2_cpt_aead_exit, + .setkey = otx2_cpt_aead_gcm_aes_setkey, + .setauthsize = otx2_cpt_aead_gcm_set_authsize, + .encrypt = otx2_cpt_aead_encrypt, + .decrypt = otx2_cpt_aead_decrypt, + .ivsize = AES_GCM_IV_SIZE, + .maxauthsize = AES_GCM_ICV_SIZE, +} }; + +static inline int cpt_register_algs(void) +{ + int i, err = 0; + + for (i = 0; i < ARRAY_SIZE(otx2_cpt_skciphers); i++) + otx2_cpt_skciphers[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + if (err) + return err; + + for (i = 0; i < ARRAY_SIZE(otx2_cpt_aeads); i++) + otx2_cpt_aeads[i].base.cra_flags &= ~CRYPTO_ALG_DEAD; + + err = crypto_register_aeads(otx2_cpt_aeads, + ARRAY_SIZE(otx2_cpt_aeads)); + if (err) { + crypto_unregister_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + return err; + } + + return 0; +} + +static inline void cpt_unregister_algs(void) +{ + crypto_unregister_skciphers(otx2_cpt_skciphers, + ARRAY_SIZE(otx2_cpt_skciphers)); + crypto_unregister_aeads(otx2_cpt_aeads, ARRAY_SIZE(otx2_cpt_aeads)); +} + +static int compare_func(const void *lptr, const void *rptr) +{ + const struct cpt_device_desc *ldesc = (struct cpt_device_desc *) lptr; + const struct cpt_device_desc *rdesc = (struct cpt_device_desc *) rptr; + + if (ldesc->dev->devfn < rdesc->dev->devfn) + return -1; + if (ldesc->dev->devfn > rdesc->dev->devfn) + return 1; + return 0; +} + +static void swap_func(void *lptr, void *rptr, int size) +{ + struct cpt_device_desc *ldesc = lptr; + struct cpt_device_desc *rdesc = rptr; + + swap(*ldesc, *rdesc); +} + +int otx2_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + int num_queues, int num_devices) +{ + int ret = 0; + int count; + + mutex_lock(&mutex); + count = atomic_read(&se_devices.count); + if (count >= OTX2_CPT_MAX_LFS_NUM) { + dev_err(&pdev->dev, "No space to add a new device\n"); + ret = -ENOSPC; + goto unlock; + } + se_devices.desc[count].num_queues = num_queues; + se_devices.desc[count++].dev = pdev; + atomic_inc(&se_devices.count); + + if (atomic_read(&se_devices.count) == num_devices && + is_crypto_registered == false) { + if (cpt_register_algs()) { + dev_err(&pdev->dev, + "Error in registering crypto algorithms\n"); + ret = -EINVAL; + goto unlock; + } + try_module_get(mod); + is_crypto_registered = true; + } + sort(se_devices.desc, count, sizeof(struct cpt_device_desc), + compare_func, swap_func); + +unlock: + mutex_unlock(&mutex); + return ret; +} + +void otx2_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod) +{ + struct cpt_device_table *dev_tbl; + bool dev_found = false; + int i, j, count; + + mutex_lock(&mutex); + + dev_tbl = &se_devices; + count = atomic_read(&dev_tbl->count); + for (i = 0; i < count; i++) { + if (pdev == dev_tbl->desc[i].dev) { + for (j = i; j < count-1; j++) + dev_tbl->desc[j] = dev_tbl->desc[j+1]; + dev_found = true; + break; + } + } + + if (!dev_found) { + dev_err(&pdev->dev, "%s device not found\n", __func__); + goto unlock; + } + if (atomic_dec_and_test(&se_devices.count)) { + cpt_unregister_algs(); + module_put(mod); + is_crypto_registered = false; + } + +unlock: + mutex_unlock(&mutex); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.h b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.h new file mode 100644 index 0000000000..f04184bd17 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.h @@ -0,0 +1,178 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * Copyright (C) 2020 Marvell. + */ + +#ifndef __OTX2_CPT_ALGS_H +#define __OTX2_CPT_ALGS_H + +#include <crypto/hash.h> +#include <crypto/skcipher.h> +#include <crypto/aead.h> +#include "otx2_cpt_common.h" + +#define OTX2_CPT_MAX_ENC_KEY_SIZE 32 +#define OTX2_CPT_MAX_HASH_KEY_SIZE 64 +#define OTX2_CPT_MAX_KEY_SIZE (OTX2_CPT_MAX_ENC_KEY_SIZE + \ + OTX2_CPT_MAX_HASH_KEY_SIZE) +enum otx2_cpt_request_type { + OTX2_CPT_ENC_DEC_REQ = 0x1, + OTX2_CPT_AEAD_ENC_DEC_REQ = 0x2, + OTX2_CPT_AEAD_ENC_DEC_NULL_REQ = 0x3, + OTX2_CPT_PASSTHROUGH_REQ = 0x4 +}; + +enum otx2_cpt_major_opcodes { + OTX2_CPT_MAJOR_OP_MISC = 0x01, + OTX2_CPT_MAJOR_OP_FC = 0x33, + OTX2_CPT_MAJOR_OP_HMAC = 0x35, +}; + +enum otx2_cpt_cipher_type { + OTX2_CPT_CIPHER_NULL = 0x0, + OTX2_CPT_DES3_CBC = 0x1, + OTX2_CPT_DES3_ECB = 0x2, + OTX2_CPT_AES_CBC = 0x3, + OTX2_CPT_AES_ECB = 0x4, + OTX2_CPT_AES_CFB = 0x5, + OTX2_CPT_AES_CTR = 0x6, + OTX2_CPT_AES_GCM = 0x7, + OTX2_CPT_AES_XTS = 0x8 +}; + +enum otx2_cpt_mac_type { + OTX2_CPT_MAC_NULL = 0x0, + OTX2_CPT_MD5 = 0x1, + OTX2_CPT_SHA1 = 0x2, + OTX2_CPT_SHA224 = 0x3, + OTX2_CPT_SHA256 = 0x4, + OTX2_CPT_SHA384 = 0x5, + OTX2_CPT_SHA512 = 0x6, + OTX2_CPT_GMAC = 0x7 +}; + +enum otx2_cpt_aes_key_len { + OTX2_CPT_AES_128_BIT = 0x1, + OTX2_CPT_AES_192_BIT = 0x2, + OTX2_CPT_AES_256_BIT = 0x3 +}; + +union otx2_cpt_encr_ctrl { + u64 u; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 enc_cipher:4; + u64 reserved_59:1; + u64 aes_key:2; + u64 iv_source:1; + u64 mac_type:4; + u64 reserved_49_51:3; + u64 auth_input_type:1; + u64 mac_len:8; + u64 reserved_32_39:8; + u64 encr_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 encr_offset:16; + u64 reserved_32_39:8; + u64 mac_len:8; + u64 auth_input_type:1; + u64 reserved_49_51:3; + u64 mac_type:4; + u64 iv_source:1; + u64 aes_key:2; + u64 reserved_59:1; + u64 enc_cipher:4; +#endif + } e; +}; + +struct otx2_cpt_cipher { + const char *name; + u8 value; +}; + +struct otx2_cpt_fc_enc_ctx { + union otx2_cpt_encr_ctrl enc_ctrl; + u8 encr_key[32]; + u8 encr_iv[16]; +}; + +union otx2_cpt_fc_hmac_ctx { + struct { + u8 ipad[64]; + u8 opad[64]; + } e; + struct { + u8 hmac_calc[64]; /* HMAC calculated */ + u8 hmac_recv[64]; /* HMAC received */ + } s; +}; + +struct otx2_cpt_fc_ctx { + struct otx2_cpt_fc_enc_ctx enc; + union otx2_cpt_fc_hmac_ctx hmac; +}; + +struct otx2_cpt_enc_ctx { + u32 key_len; + u8 enc_key[OTX2_CPT_MAX_KEY_SIZE]; + u8 cipher_type; + u8 key_type; + u8 enc_align_len; + struct crypto_skcipher *fbk_cipher; +}; + +union otx2_cpt_offset_ctrl { + u64 flags; + struct { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved:32; + u64 enc_data_offset:16; + u64 iv_offset:8; + u64 auth_offset:8; +#else + u64 auth_offset:8; + u64 iv_offset:8; + u64 enc_data_offset:16; + u64 reserved:32; +#endif + } e; +}; + +struct otx2_cpt_req_ctx { + struct otx2_cpt_req_info cpt_req; + union otx2_cpt_offset_ctrl ctrl_word; + struct otx2_cpt_fc_ctx fctx; + union { + struct skcipher_request sk_fbk_req; + struct aead_request fbk_req; + }; +}; + +struct otx2_cpt_sdesc { + struct shash_desc shash; +}; + +struct otx2_cpt_aead_ctx { + u8 key[OTX2_CPT_MAX_KEY_SIZE]; + struct crypto_shash *hashalg; + struct otx2_cpt_sdesc *sdesc; + struct crypto_aead *fbk_cipher; + u8 *ipad; + u8 *opad; + u32 enc_key_len; + u32 auth_key_len; + u8 cipher_type; + u8 mac_type; + u8 key_type; + u8 is_trunc_hmac; + u8 enc_align_len; +}; +int otx2_cpt_crypto_init(struct pci_dev *pdev, struct module *mod, + int num_queues, int num_devices); +void otx2_cpt_crypto_exit(struct pci_dev *pdev, struct module *mod); + +#endif /* __OTX2_CPT_ALGS_H */ diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c new file mode 100644 index 0000000000..bac729c885 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptvf.h" +#include "otx2_cptlf.h" +#include "otx2_cptvf_algs.h" +#include "cn10k_cpt.h" +#include <rvu_reg.h> + +#define OTX2_CPTVF_DRV_NAME "rvu_cptvf" + +static void cptvf_enable_pfvf_mbox_intrs(struct otx2_cptvf_dev *cptvf) +{ + /* Clear interrupt if any */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, OTX2_RVU_VF_INT, + 0x1ULL); + + /* Enable PF-VF interrupt */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, + OTX2_RVU_VF_INT_ENA_W1S, 0x1ULL); +} + +static void cptvf_disable_pfvf_mbox_intrs(struct otx2_cptvf_dev *cptvf) +{ + /* Disable PF-VF interrupt */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, + OTX2_RVU_VF_INT_ENA_W1C, 0x1ULL); + + /* Clear interrupt if any */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, OTX2_RVU_VF_INT, + 0x1ULL); +} + +static int cptvf_register_interrupts(struct otx2_cptvf_dev *cptvf) +{ + int ret, irq; + int num_vec; + + num_vec = pci_msix_vec_count(cptvf->pdev); + if (num_vec <= 0) + return -EINVAL; + + /* Enable MSI-X */ + ret = pci_alloc_irq_vectors(cptvf->pdev, num_vec, num_vec, + PCI_IRQ_MSIX); + if (ret < 0) { + dev_err(&cptvf->pdev->dev, + "Request for %d msix vectors failed\n", num_vec); + return ret; + } + irq = pci_irq_vector(cptvf->pdev, OTX2_CPT_VF_INT_VEC_E_MBOX); + /* Register VF<=>PF mailbox interrupt handler */ + ret = devm_request_irq(&cptvf->pdev->dev, irq, + otx2_cptvf_pfvf_mbox_intr, 0, + "CPTPFVF Mbox", cptvf); + if (ret) + return ret; + /* Enable PF-VF mailbox interrupts */ + cptvf_enable_pfvf_mbox_intrs(cptvf); + + ret = otx2_cpt_send_ready_msg(&cptvf->pfvf_mbox, cptvf->pdev); + if (ret) { + dev_warn(&cptvf->pdev->dev, + "PF not responding to mailbox, deferring probe\n"); + cptvf_disable_pfvf_mbox_intrs(cptvf); + return -EPROBE_DEFER; + } + return 0; +} + +static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf) +{ + struct pci_dev *pdev = cptvf->pdev; + resource_size_t offset, size; + int ret; + + cptvf->pfvf_mbox_wq = + alloc_ordered_workqueue("cpt_pfvf_mailbox", + WQ_HIGHPRI | WQ_MEM_RECLAIM); + if (!cptvf->pfvf_mbox_wq) + return -ENOMEM; + + if (test_bit(CN10K_MBOX, &cptvf->cap_flag)) { + /* For cn10k platform, VF mailbox region is in its BAR2 + * register space + */ + cptvf->pfvf_mbox_base = cptvf->reg_base + + CN10K_CPT_VF_MBOX_REGION; + } else { + offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM); + size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM); + /* Map PF-VF mailbox memory */ + cptvf->pfvf_mbox_base = devm_ioremap_wc(&pdev->dev, offset, + size); + if (!cptvf->pfvf_mbox_base) { + dev_err(&pdev->dev, "Unable to map BAR4\n"); + ret = -ENOMEM; + goto free_wqe; + } + } + + ret = otx2_mbox_init(&cptvf->pfvf_mbox, cptvf->pfvf_mbox_base, + pdev, cptvf->reg_base, MBOX_DIR_VFPF, 1); + if (ret) + goto free_wqe; + + ret = otx2_cpt_mbox_bbuf_init(cptvf, pdev); + if (ret) + goto destroy_mbox; + + INIT_WORK(&cptvf->pfvf_mbox_work, otx2_cptvf_pfvf_mbox_handler); + return 0; + +destroy_mbox: + otx2_mbox_destroy(&cptvf->pfvf_mbox); +free_wqe: + destroy_workqueue(cptvf->pfvf_mbox_wq); + return ret; +} + +static void cptvf_pfvf_mbox_destroy(struct otx2_cptvf_dev *cptvf) +{ + destroy_workqueue(cptvf->pfvf_mbox_wq); + otx2_mbox_destroy(&cptvf->pfvf_mbox); +} + +static void cptlf_work_handler(unsigned long data) +{ + otx2_cpt_post_process((struct otx2_cptlf_wqe *) data); +} + +static void cleanup_tasklet_work(struct otx2_cptlfs_info *lfs) +{ + int i; + + for (i = 0; i < lfs->lfs_num; i++) { + if (!lfs->lf[i].wqe) + continue; + + tasklet_kill(&lfs->lf[i].wqe->work); + kfree(lfs->lf[i].wqe); + lfs->lf[i].wqe = NULL; + } +} + +static int init_tasklet_work(struct otx2_cptlfs_info *lfs) +{ + struct otx2_cptlf_wqe *wqe; + int i, ret = 0; + + for (i = 0; i < lfs->lfs_num; i++) { + wqe = kzalloc(sizeof(struct otx2_cptlf_wqe), GFP_KERNEL); + if (!wqe) { + ret = -ENOMEM; + goto cleanup_tasklet; + } + + tasklet_init(&wqe->work, cptlf_work_handler, (u64) wqe); + wqe->lfs = lfs; + wqe->lf_num = i; + lfs->lf[i].wqe = wqe; + } + return 0; + +cleanup_tasklet: + cleanup_tasklet_work(lfs); + return ret; +} + +static void free_pending_queues(struct otx2_cptlfs_info *lfs) +{ + int i; + + for (i = 0; i < lfs->lfs_num; i++) { + kfree(lfs->lf[i].pqueue.head); + lfs->lf[i].pqueue.head = NULL; + } +} + +static int alloc_pending_queues(struct otx2_cptlfs_info *lfs) +{ + int size, ret, i; + + if (!lfs->lfs_num) + return -EINVAL; + + for (i = 0; i < lfs->lfs_num; i++) { + lfs->lf[i].pqueue.qlen = OTX2_CPT_INST_QLEN_MSGS; + size = lfs->lf[i].pqueue.qlen * + sizeof(struct otx2_cpt_pending_entry); + + lfs->lf[i].pqueue.head = kzalloc(size, GFP_KERNEL); + if (!lfs->lf[i].pqueue.head) { + ret = -ENOMEM; + goto error; + } + + /* Initialize spin lock */ + spin_lock_init(&lfs->lf[i].pqueue.lock); + } + return 0; + +error: + free_pending_queues(lfs); + return ret; +} + +static void lf_sw_cleanup(struct otx2_cptlfs_info *lfs) +{ + cleanup_tasklet_work(lfs); + free_pending_queues(lfs); +} + +static int lf_sw_init(struct otx2_cptlfs_info *lfs) +{ + int ret; + + ret = alloc_pending_queues(lfs); + if (ret) { + dev_err(&lfs->pdev->dev, + "Allocating pending queues failed\n"); + return ret; + } + ret = init_tasklet_work(lfs); + if (ret) { + dev_err(&lfs->pdev->dev, + "Tasklet work init failed\n"); + goto pending_queues_free; + } + return 0; + +pending_queues_free: + free_pending_queues(lfs); + return ret; +} + +static void cptvf_lf_shutdown(struct otx2_cptlfs_info *lfs) +{ + atomic_set(&lfs->state, OTX2_CPTLF_IN_RESET); + + /* Remove interrupts affinity */ + otx2_cptlf_free_irqs_affinity(lfs); + /* Disable instruction queue */ + otx2_cptlf_disable_iqueues(lfs); + /* Unregister crypto algorithms */ + otx2_cpt_crypto_exit(lfs->pdev, THIS_MODULE); + /* Unregister LFs interrupts */ + otx2_cptlf_unregister_interrupts(lfs); + /* Cleanup LFs software side */ + lf_sw_cleanup(lfs); + /* Send request to detach LFs */ + otx2_cpt_detach_rsrcs_msg(lfs); +} + +static int cptvf_lf_init(struct otx2_cptvf_dev *cptvf) +{ + struct otx2_cptlfs_info *lfs = &cptvf->lfs; + struct device *dev = &cptvf->pdev->dev; + int ret, lfs_num; + u8 eng_grp_msk; + + /* Get engine group number for symmetric crypto */ + cptvf->lfs.kcrypto_eng_grp_num = OTX2_CPT_INVALID_CRYPTO_ENG_GRP; + ret = otx2_cptvf_send_eng_grp_num_msg(cptvf, OTX2_CPT_SE_TYPES); + if (ret) + return ret; + + if (cptvf->lfs.kcrypto_eng_grp_num == OTX2_CPT_INVALID_CRYPTO_ENG_GRP) { + dev_err(dev, "Engine group for kernel crypto not available\n"); + ret = -ENOENT; + return ret; + } + eng_grp_msk = 1 << cptvf->lfs.kcrypto_eng_grp_num; + + ret = otx2_cptvf_send_kvf_limits_msg(cptvf); + if (ret) + return ret; + + lfs_num = cptvf->lfs.kvf_limits ? cptvf->lfs.kvf_limits : + num_online_cpus(); + + otx2_cptlf_set_dev_info(lfs, cptvf->pdev, cptvf->reg_base, + &cptvf->pfvf_mbox, cptvf->blkaddr); + ret = otx2_cptlf_init(lfs, eng_grp_msk, OTX2_CPT_QUEUE_HI_PRIO, + lfs_num); + if (ret) + return ret; + + /* Get msix offsets for attached LFs */ + ret = otx2_cpt_msix_offset_msg(lfs); + if (ret) + goto cleanup_lf; + + /* Initialize LFs software side */ + ret = lf_sw_init(lfs); + if (ret) + goto cleanup_lf; + + /* Register LFs interrupts */ + ret = otx2_cptlf_register_interrupts(lfs); + if (ret) + goto cleanup_lf_sw; + + /* Set interrupts affinity */ + ret = otx2_cptlf_set_irqs_affinity(lfs); + if (ret) + goto unregister_intr; + + atomic_set(&lfs->state, OTX2_CPTLF_STARTED); + /* Register crypto algorithms */ + ret = otx2_cpt_crypto_init(lfs->pdev, THIS_MODULE, lfs_num, 1); + if (ret) { + dev_err(&lfs->pdev->dev, "algorithms registration failed\n"); + goto disable_irqs; + } + return 0; + +disable_irqs: + otx2_cptlf_free_irqs_affinity(lfs); +unregister_intr: + otx2_cptlf_unregister_interrupts(lfs); +cleanup_lf_sw: + lf_sw_cleanup(lfs); +cleanup_lf: + otx2_cptlf_shutdown(lfs); + + return ret; +} + +static int otx2_cptvf_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct otx2_cptvf_dev *cptvf; + int ret; + + cptvf = devm_kzalloc(dev, sizeof(*cptvf), GFP_KERNEL); + if (!cptvf) + return -ENOMEM; + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "Failed to enable PCI device\n"); + goto clear_drvdata; + } + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto clear_drvdata; + } + /* Map VF's configuration registers */ + ret = pcim_iomap_regions_request_all(pdev, 1 << PCI_PF_REG_BAR_NUM, + OTX2_CPTVF_DRV_NAME); + if (ret) { + dev_err(dev, "Couldn't get PCI resources 0x%x\n", ret); + goto clear_drvdata; + } + pci_set_master(pdev); + pci_set_drvdata(pdev, cptvf); + cptvf->pdev = pdev; + + cptvf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM]; + + otx2_cpt_set_hw_caps(pdev, &cptvf->cap_flag); + + ret = cn10k_cptvf_lmtst_init(cptvf); + if (ret) + goto clear_drvdata; + + /* Initialize PF<=>VF mailbox */ + ret = cptvf_pfvf_mbox_init(cptvf); + if (ret) + goto clear_drvdata; + + /* Register interrupts */ + ret = cptvf_register_interrupts(cptvf); + if (ret) + goto destroy_pfvf_mbox; + + cptvf->blkaddr = BLKADDR_CPT0; + /* Initialize CPT LFs */ + ret = cptvf_lf_init(cptvf); + if (ret) + goto unregister_interrupts; + + return 0; + +unregister_interrupts: + cptvf_disable_pfvf_mbox_intrs(cptvf); +destroy_pfvf_mbox: + cptvf_pfvf_mbox_destroy(cptvf); +clear_drvdata: + pci_set_drvdata(pdev, NULL); + + return ret; +} + +static void otx2_cptvf_remove(struct pci_dev *pdev) +{ + struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); + + if (!cptvf) { + dev_err(&pdev->dev, "Invalid CPT VF device.\n"); + return; + } + cptvf_lf_shutdown(&cptvf->lfs); + /* Disable PF-VF mailbox interrupt */ + cptvf_disable_pfvf_mbox_intrs(cptvf); + /* Destroy PF-VF mbox */ + cptvf_pfvf_mbox_destroy(cptvf); + pci_set_drvdata(pdev, NULL); +} + +/* Supported devices */ +static const struct pci_device_id otx2_cptvf_id_table[] = { + {PCI_VDEVICE(CAVIUM, OTX2_CPT_PCI_VF_DEVICE_ID), 0}, + {PCI_VDEVICE(CAVIUM, CN10K_CPT_PCI_VF_DEVICE_ID), 0}, + { 0, } /* end of table */ +}; + +static struct pci_driver otx2_cptvf_pci_driver = { + .name = OTX2_CPTVF_DRV_NAME, + .id_table = otx2_cptvf_id_table, + .probe = otx2_cptvf_probe, + .remove = otx2_cptvf_remove, +}; + +module_pci_driver(otx2_cptvf_pci_driver); + +MODULE_IMPORT_NS(CRYPTO_DEV_OCTEONTX2_CPT); + +MODULE_AUTHOR("Marvell"); +MODULE_DESCRIPTION("Marvell RVU CPT Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_DEVICE_TABLE(pci, otx2_cptvf_id_table); diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c new file mode 100644 index 0000000000..75c403f2b1 --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cpt_common.h" +#include "otx2_cptvf.h" +#include <rvu_reg.h> + +int otx2_cpt_mbox_bbuf_init(struct otx2_cptvf_dev *cptvf, struct pci_dev *pdev) +{ + struct otx2_mbox_dev *mdev; + struct otx2_mbox *otx2_mbox; + + cptvf->bbuf_base = devm_kmalloc(&pdev->dev, MBOX_SIZE, GFP_KERNEL); + if (!cptvf->bbuf_base) + return -ENOMEM; + /* + * Overwrite mbox mbase to point to bounce buffer, so that PF/VF + * prepare all mbox messages in bounce buffer instead of directly + * in hw mbox memory. + */ + otx2_mbox = &cptvf->pfvf_mbox; + mdev = &otx2_mbox->dev[0]; + mdev->mbase = cptvf->bbuf_base; + + return 0; +} + +static void otx2_cpt_sync_mbox_bbuf(struct otx2_mbox *mbox, int devid) +{ + u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE); + struct otx2_mbox_dev *mdev = &mbox->dev[devid]; + struct mbox_hdr *hdr; + u64 msg_size; + + if (mdev->mbase == hw_mbase) + return; + + hdr = hw_mbase + mbox->rx_start; + msg_size = hdr->msg_size; + + if (msg_size > mbox->rx_size - msgs_offset) + msg_size = mbox->rx_size - msgs_offset; + + /* Copy mbox messages from mbox memory to bounce buffer */ + memcpy(mdev->mbase + mbox->rx_start, + hw_mbase + mbox->rx_start, msg_size + msgs_offset); +} + +irqreturn_t otx2_cptvf_pfvf_mbox_intr(int __always_unused irq, void *arg) +{ + struct otx2_cptvf_dev *cptvf = arg; + u64 intr; + + /* Read the interrupt bits */ + intr = otx2_cpt_read64(cptvf->reg_base, BLKADDR_RVUM, 0, + OTX2_RVU_VF_INT); + + if (intr & 0x1ULL) { + /* Schedule work queue function to process the MBOX request */ + queue_work(cptvf->pfvf_mbox_wq, &cptvf->pfvf_mbox_work); + /* Clear and ack the interrupt */ + otx2_cpt_write64(cptvf->reg_base, BLKADDR_RVUM, 0, + OTX2_RVU_VF_INT, 0x1ULL); + } + return IRQ_HANDLED; +} + +static void process_pfvf_mbox_mbox_msg(struct otx2_cptvf_dev *cptvf, + struct mbox_msghdr *msg) +{ + struct otx2_cptlfs_info *lfs = &cptvf->lfs; + struct otx2_cpt_kvf_limits_rsp *rsp_limits; + struct otx2_cpt_egrp_num_rsp *rsp_grp; + struct cpt_rd_wr_reg_msg *rsp_reg; + struct msix_offset_rsp *rsp_msix; + int i; + + if (msg->id >= MBOX_MSG_MAX) { + dev_err(&cptvf->pdev->dev, + "MBOX msg with unknown ID %d\n", msg->id); + return; + } + if (msg->sig != OTX2_MBOX_RSP_SIG) { + dev_err(&cptvf->pdev->dev, + "MBOX msg with wrong signature %x, ID %d\n", + msg->sig, msg->id); + return; + } + switch (msg->id) { + case MBOX_MSG_READY: + cptvf->vf_id = ((msg->pcifunc >> RVU_PFVF_FUNC_SHIFT) + & RVU_PFVF_FUNC_MASK) - 1; + break; + case MBOX_MSG_ATTACH_RESOURCES: + /* Check if resources were successfully attached */ + if (!msg->rc) + lfs->are_lfs_attached = 1; + break; + case MBOX_MSG_DETACH_RESOURCES: + /* Check if resources were successfully detached */ + if (!msg->rc) + lfs->are_lfs_attached = 0; + break; + case MBOX_MSG_MSIX_OFFSET: + rsp_msix = (struct msix_offset_rsp *) msg; + for (i = 0; i < rsp_msix->cptlfs; i++) + lfs->lf[i].msix_offset = rsp_msix->cptlf_msixoff[i]; + break; + case MBOX_MSG_CPT_RD_WR_REGISTER: + rsp_reg = (struct cpt_rd_wr_reg_msg *) msg; + if (msg->rc) { + dev_err(&cptvf->pdev->dev, + "Reg %llx rd/wr(%d) failed %d\n", + rsp_reg->reg_offset, rsp_reg->is_write, + msg->rc); + return; + } + if (!rsp_reg->is_write) + *rsp_reg->ret_val = rsp_reg->val; + break; + case MBOX_MSG_GET_ENG_GRP_NUM: + rsp_grp = (struct otx2_cpt_egrp_num_rsp *) msg; + cptvf->lfs.kcrypto_eng_grp_num = rsp_grp->eng_grp_num; + break; + case MBOX_MSG_GET_KVF_LIMITS: + rsp_limits = (struct otx2_cpt_kvf_limits_rsp *) msg; + cptvf->lfs.kvf_limits = rsp_limits->kvf_limits; + break; + default: + dev_err(&cptvf->pdev->dev, "Unsupported msg %d received.\n", + msg->id); + break; + } +} + +void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work) +{ + struct otx2_cptvf_dev *cptvf; + struct otx2_mbox *pfvf_mbox; + struct otx2_mbox_dev *mdev; + struct mbox_hdr *rsp_hdr; + struct mbox_msghdr *msg; + int offset, i; + + /* sync with mbox memory region */ + smp_rmb(); + + cptvf = container_of(work, struct otx2_cptvf_dev, pfvf_mbox_work); + pfvf_mbox = &cptvf->pfvf_mbox; + otx2_cpt_sync_mbox_bbuf(pfvf_mbox, 0); + mdev = &pfvf_mbox->dev[0]; + rsp_hdr = (struct mbox_hdr *)(mdev->mbase + pfvf_mbox->rx_start); + if (rsp_hdr->num_msgs == 0) + return; + offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN); + + for (i = 0; i < rsp_hdr->num_msgs; i++) { + msg = (struct mbox_msghdr *)(mdev->mbase + pfvf_mbox->rx_start + + offset); + process_pfvf_mbox_mbox_msg(cptvf, msg); + offset = msg->next_msgoff; + mdev->msgs_acked++; + } + otx2_mbox_reset(pfvf_mbox, 0); +} + +int otx2_cptvf_send_eng_grp_num_msg(struct otx2_cptvf_dev *cptvf, int eng_type) +{ + struct otx2_mbox *mbox = &cptvf->pfvf_mbox; + struct pci_dev *pdev = cptvf->pdev; + struct otx2_cpt_egrp_num_msg *req; + + req = (struct otx2_cpt_egrp_num_msg *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct otx2_cpt_egrp_num_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + req->hdr.id = MBOX_MSG_GET_ENG_GRP_NUM; + req->hdr.sig = OTX2_MBOX_REQ_SIG; + req->hdr.pcifunc = OTX2_CPT_RVU_PFFUNC(cptvf->vf_id, 0); + req->eng_type = eng_type; + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} + +int otx2_cptvf_send_kvf_limits_msg(struct otx2_cptvf_dev *cptvf) +{ + struct otx2_mbox *mbox = &cptvf->pfvf_mbox; + struct pci_dev *pdev = cptvf->pdev; + struct mbox_msghdr *req; + + req = (struct mbox_msghdr *) + otx2_mbox_alloc_msg_rsp(mbox, 0, sizeof(*req), + sizeof(struct otx2_cpt_kvf_limits_rsp)); + if (req == NULL) { + dev_err(&pdev->dev, "RVU MBOX failed to get message.\n"); + return -EFAULT; + } + req->id = MBOX_MSG_GET_KVF_LIMITS; + req->sig = OTX2_MBOX_REQ_SIG; + req->pcifunc = OTX2_CPT_RVU_PFFUNC(cptvf->vf_id, 0); + + return otx2_cpt_send_mbox_msg(mbox, pdev); +} diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c new file mode 100644 index 0000000000..811ded72ce --- /dev/null +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2020 Marvell. */ + +#include "otx2_cptvf.h" +#include "otx2_cpt_common.h" + +/* SG list header size in bytes */ +#define SG_LIST_HDR_SIZE 8 + +/* Default timeout when waiting for free pending entry in us */ +#define CPT_PENTRY_TIMEOUT 1000 +#define CPT_PENTRY_STEP 50 + +/* Default threshold for stopping and resuming sender requests */ +#define CPT_IQ_STOP_MARGIN 128 +#define CPT_IQ_RESUME_MARGIN 512 + +/* Default command timeout in seconds */ +#define CPT_COMMAND_TIMEOUT 4 +#define CPT_TIME_IN_RESET_COUNT 5 + +static void otx2_cpt_dump_sg_list(struct pci_dev *pdev, + struct otx2_cpt_req_info *req) +{ + int i; + + pr_debug("Gather list size %d\n", req->in_cnt); + for (i = 0; i < req->in_cnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->in[i].size, req->in[i].vptr, + (void *) req->in[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", + req->in[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->in[i].vptr, req->in[i].size, false); + } + pr_debug("Scatter list size %d\n", req->out_cnt); + for (i = 0; i < req->out_cnt; i++) { + pr_debug("Buffer %d size %d, vptr 0x%p, dmaptr 0x%p\n", i, + req->out[i].size, req->out[i].vptr, + (void *) req->out[i].dma_addr); + pr_debug("Buffer hexdump (%d bytes)\n", req->out[i].size); + print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, + req->out[i].vptr, req->out[i].size, false); + } +} + +static inline struct otx2_cpt_pending_entry *get_free_pending_entry( + struct otx2_cpt_pending_queue *q, + int qlen) +{ + struct otx2_cpt_pending_entry *ent = NULL; + + ent = &q->head[q->rear]; + if (unlikely(ent->busy)) + return NULL; + + q->rear++; + if (unlikely(q->rear == qlen)) + q->rear = 0; + + return ent; +} + +static inline u32 modulo_inc(u32 index, u32 length, u32 inc) +{ + if (WARN_ON(inc > length)) + inc = length; + + index += inc; + if (unlikely(index >= length)) + index -= length; + + return index; +} + +static inline void free_pentry(struct otx2_cpt_pending_entry *pentry) +{ + pentry->completion_addr = NULL; + pentry->info = NULL; + pentry->callback = NULL; + pentry->areq = NULL; + pentry->resume_sender = false; + pentry->busy = false; +} + +static inline int setup_sgio_components(struct pci_dev *pdev, + struct otx2_cpt_buf_ptr *list, + int buf_count, u8 *buffer) +{ + struct otx2_cpt_sglist_component *sg_ptr = NULL; + int ret = 0, i, j; + int components; + + if (unlikely(!list)) { + dev_err(&pdev->dev, "Input list pointer is NULL\n"); + return -EFAULT; + } + + for (i = 0; i < buf_count; i++) { + if (unlikely(!list[i].vptr)) + continue; + list[i].dma_addr = dma_map_single(&pdev->dev, list[i].vptr, + list[i].size, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, list[i].dma_addr))) { + dev_err(&pdev->dev, "Dma mapping failed\n"); + ret = -EIO; + goto sg_cleanup; + } + } + components = buf_count / 4; + sg_ptr = (struct otx2_cpt_sglist_component *)buffer; + for (i = 0; i < components; i++) { + sg_ptr->len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->len3 = cpu_to_be16(list[i * 4 + 3].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + sg_ptr->ptr3 = cpu_to_be64(list[i * 4 + 3].dma_addr); + sg_ptr++; + } + components = buf_count % 4; + + switch (components) { + case 3: + sg_ptr->len2 = cpu_to_be16(list[i * 4 + 2].size); + sg_ptr->ptr2 = cpu_to_be64(list[i * 4 + 2].dma_addr); + fallthrough; + case 2: + sg_ptr->len1 = cpu_to_be16(list[i * 4 + 1].size); + sg_ptr->ptr1 = cpu_to_be64(list[i * 4 + 1].dma_addr); + fallthrough; + case 1: + sg_ptr->len0 = cpu_to_be16(list[i * 4 + 0].size); + sg_ptr->ptr0 = cpu_to_be64(list[i * 4 + 0].dma_addr); + break; + default: + break; + } + return ret; + +sg_cleanup: + for (j = 0; j < i; j++) { + if (list[j].dma_addr) { + dma_unmap_single(&pdev->dev, list[j].dma_addr, + list[j].size, DMA_BIDIRECTIONAL); + } + + list[j].dma_addr = 0; + } + return ret; +} + +static inline struct otx2_cpt_inst_info *info_create(struct pci_dev *pdev, + struct otx2_cpt_req_info *req, + gfp_t gfp) +{ + int align = OTX2_CPT_DMA_MINALIGN; + struct otx2_cpt_inst_info *info; + u32 dlen, align_dlen, info_len; + u16 g_sz_bytes, s_sz_bytes; + u32 total_mem_len; + + if (unlikely(req->in_cnt > OTX2_CPT_MAX_SG_IN_CNT || + req->out_cnt > OTX2_CPT_MAX_SG_OUT_CNT)) { + dev_err(&pdev->dev, "Error too many sg components\n"); + return NULL; + } + + g_sz_bytes = ((req->in_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + s_sz_bytes = ((req->out_cnt + 3) / 4) * + sizeof(struct otx2_cpt_sglist_component); + + dlen = g_sz_bytes + s_sz_bytes + SG_LIST_HDR_SIZE; + align_dlen = ALIGN(dlen, align); + info_len = ALIGN(sizeof(*info), align); + total_mem_len = align_dlen + info_len + sizeof(union otx2_cpt_res_s); + + info = kzalloc(total_mem_len, gfp); + if (unlikely(!info)) + return NULL; + + info->dlen = dlen; + info->in_buffer = (u8 *)info + info_len; + + ((u16 *)info->in_buffer)[0] = req->out_cnt; + ((u16 *)info->in_buffer)[1] = req->in_cnt; + ((u16 *)info->in_buffer)[2] = 0; + ((u16 *)info->in_buffer)[3] = 0; + cpu_to_be64s((u64 *)info->in_buffer); + + /* Setup gather (input) components */ + if (setup_sgio_components(pdev, req->in, req->in_cnt, + &info->in_buffer[8])) { + dev_err(&pdev->dev, "Failed to setup gather list\n"); + goto destroy_info; + } + + if (setup_sgio_components(pdev, req->out, req->out_cnt, + &info->in_buffer[8 + g_sz_bytes])) { + dev_err(&pdev->dev, "Failed to setup scatter list\n"); + goto destroy_info; + } + + info->dma_len = total_mem_len - info_len; + info->dptr_baddr = dma_map_single(&pdev->dev, info->in_buffer, + info->dma_len, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(&pdev->dev, info->dptr_baddr))) { + dev_err(&pdev->dev, "DMA Mapping failed for cpt req\n"); + goto destroy_info; + } + /* + * Get buffer for union otx2_cpt_res_s response + * structure and its physical address + */ + info->completion_addr = info->in_buffer + align_dlen; + info->comp_baddr = info->dptr_baddr + align_dlen; + + return info; + +destroy_info: + otx2_cpt_info_destroy(pdev, info); + return NULL; +} + +static int process_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + struct otx2_cpt_pending_queue *pqueue, + struct otx2_cptlf_info *lf) +{ + struct otx2_cptvf_request *cpt_req = &req->req; + struct otx2_cpt_pending_entry *pentry = NULL; + union otx2_cpt_ctrl_info *ctrl = &req->ctrl; + struct otx2_cpt_inst_info *info = NULL; + union otx2_cpt_res_s *result = NULL; + struct otx2_cpt_iq_command iq_cmd; + union otx2_cpt_inst_s cptinst; + int retry, ret = 0; + u8 resume_sender; + gfp_t gfp; + + gfp = (req->areq->flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : + GFP_ATOMIC; + if (unlikely(!otx2_cptlf_started(lf->lfs))) + return -ENODEV; + + info = info_create(pdev, req, gfp); + if (unlikely(!info)) { + dev_err(&pdev->dev, "Setting up cpt inst info failed"); + return -ENOMEM; + } + cpt_req->dlen = info->dlen; + + result = info->completion_addr; + result->s.compcode = OTX2_CPT_COMPLETION_CODE_INIT; + + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + retry = CPT_PENTRY_TIMEOUT / CPT_PENTRY_STEP; + while (unlikely(!pentry) && retry--) { + spin_unlock_bh(&pqueue->lock); + udelay(CPT_PENTRY_STEP); + spin_lock_bh(&pqueue->lock); + pentry = get_free_pending_entry(pqueue, pqueue->qlen); + } + + if (unlikely(!pentry)) { + ret = -ENOSPC; + goto destroy_info; + } + + /* + * Check if we are close to filling in entire pending queue, + * if so then tell the sender to stop/sleep by returning -EBUSY + * We do it only for context which can sleep (GFP_KERNEL) + */ + if (gfp == GFP_KERNEL && + pqueue->pending_count > (pqueue->qlen - CPT_IQ_STOP_MARGIN)) { + pentry->resume_sender = true; + } else + pentry->resume_sender = false; + resume_sender = pentry->resume_sender; + pqueue->pending_count++; + + pentry->completion_addr = info->completion_addr; + pentry->info = info; + pentry->callback = req->callback; + pentry->areq = req->areq; + pentry->busy = true; + info->pentry = pentry; + info->time_in = jiffies; + info->req = req; + + /* Fill in the command */ + iq_cmd.cmd.u = 0; + iq_cmd.cmd.s.opcode = cpu_to_be16(cpt_req->opcode.flags); + iq_cmd.cmd.s.param1 = cpu_to_be16(cpt_req->param1); + iq_cmd.cmd.s.param2 = cpu_to_be16(cpt_req->param2); + iq_cmd.cmd.s.dlen = cpu_to_be16(cpt_req->dlen); + + /* 64-bit swap for microcode data reads, not needed for addresses*/ + cpu_to_be64s(&iq_cmd.cmd.u); + iq_cmd.dptr = info->dptr_baddr; + iq_cmd.rptr = 0; + iq_cmd.cptr.u = 0; + iq_cmd.cptr.s.grp = ctrl->s.grp; + + /* Fill in the CPT_INST_S type command for HW interpretation */ + otx2_cpt_fill_inst(&cptinst, &iq_cmd, info->comp_baddr); + + /* Print debug info if enabled */ + otx2_cpt_dump_sg_list(pdev, req); + pr_debug("Cpt_inst_s hexdump (%d bytes)\n", OTX2_CPT_INST_SIZE); + print_hex_dump_debug("", 0, 16, 1, &cptinst, OTX2_CPT_INST_SIZE, false); + pr_debug("Dptr hexdump (%d bytes)\n", cpt_req->dlen); + print_hex_dump_debug("", 0, 16, 1, info->in_buffer, + cpt_req->dlen, false); + + /* Send CPT command */ + lf->lfs->ops->send_cmd(&cptinst, 1, lf); + + /* + * We allocate and prepare pending queue entry in critical section + * together with submitting CPT instruction to CPT instruction queue + * to make sure that order of CPT requests is the same in both + * pending and instruction queues + */ + spin_unlock_bh(&pqueue->lock); + + ret = resume_sender ? -EBUSY : -EINPROGRESS; + return ret; + +destroy_info: + spin_unlock_bh(&pqueue->lock); + otx2_cpt_info_destroy(pdev, info); + return ret; +} + +int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req, + int cpu_num) +{ + struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); + struct otx2_cptlfs_info *lfs = &cptvf->lfs; + + return process_request(lfs->pdev, req, &lfs->lf[cpu_num].pqueue, + &lfs->lf[cpu_num]); +} + +static int cpt_process_ccode(struct otx2_cptlfs_info *lfs, + union otx2_cpt_res_s *cpt_status, + struct otx2_cpt_inst_info *info, + u32 *res_code) +{ + u8 uc_ccode = lfs->ops->cpt_get_uc_compcode(cpt_status); + u8 ccode = lfs->ops->cpt_get_compcode(cpt_status); + struct pci_dev *pdev = lfs->pdev; + + switch (ccode) { + case OTX2_CPT_COMP_E_FAULT: + dev_err(&pdev->dev, + "Request failed with DMA fault\n"); + otx2_cpt_dump_sg_list(pdev, info->req); + break; + + case OTX2_CPT_COMP_E_HWERR: + dev_err(&pdev->dev, + "Request failed with hardware error\n"); + otx2_cpt_dump_sg_list(pdev, info->req); + break; + + case OTX2_CPT_COMP_E_INSTERR: + dev_err(&pdev->dev, + "Request failed with instruction error\n"); + otx2_cpt_dump_sg_list(pdev, info->req); + break; + + case OTX2_CPT_COMP_E_NOTDONE: + /* check for timeout */ + if (time_after_eq(jiffies, info->time_in + + CPT_COMMAND_TIMEOUT * HZ)) + dev_warn(&pdev->dev, + "Request timed out 0x%p", info->req); + else if (info->extra_time < CPT_TIME_IN_RESET_COUNT) { + info->time_in = jiffies; + info->extra_time++; + } + return 1; + + case OTX2_CPT_COMP_E_GOOD: + case OTX2_CPT_COMP_E_WARN: + /* + * Check microcode completion code, it is only valid + * when completion code is CPT_COMP_E::GOOD + */ + if (uc_ccode != OTX2_CPT_UCC_SUCCESS) { + /* + * If requested hmac is truncated and ucode returns + * s/g write length error then we report success + * because ucode writes as many bytes of calculated + * hmac as available in gather buffer and reports + * s/g write length error if number of bytes in gather + * buffer is less than full hmac size. + */ + if (info->req->is_trunc_hmac && + uc_ccode == OTX2_CPT_UCC_SG_WRITE_LENGTH) { + *res_code = 0; + break; + } + + dev_err(&pdev->dev, + "Request failed with software error code 0x%x\n", + cpt_status->s.uc_compcode); + otx2_cpt_dump_sg_list(pdev, info->req); + break; + } + /* Request has been processed with success */ + *res_code = 0; + break; + + default: + dev_err(&pdev->dev, + "Request returned invalid status %d\n", ccode); + break; + } + return 0; +} + +static inline void process_pending_queue(struct otx2_cptlfs_info *lfs, + struct otx2_cpt_pending_queue *pqueue) +{ + struct otx2_cpt_pending_entry *resume_pentry = NULL; + void (*callback)(int status, void *arg, void *req); + struct otx2_cpt_pending_entry *pentry = NULL; + union otx2_cpt_res_s *cpt_status = NULL; + struct otx2_cpt_inst_info *info = NULL; + struct otx2_cpt_req_info *req = NULL; + struct crypto_async_request *areq; + struct pci_dev *pdev = lfs->pdev; + u32 res_code, resume_index; + + while (1) { + spin_lock_bh(&pqueue->lock); + pentry = &pqueue->head[pqueue->front]; + + if (WARN_ON(!pentry)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + res_code = -EINVAL; + if (unlikely(!pentry->busy)) { + spin_unlock_bh(&pqueue->lock); + break; + } + + if (unlikely(!pentry->callback)) { + dev_err(&pdev->dev, "Callback NULL\n"); + goto process_pentry; + } + + info = pentry->info; + if (unlikely(!info)) { + dev_err(&pdev->dev, "Pending entry post arg NULL\n"); + goto process_pentry; + } + + req = info->req; + if (unlikely(!req)) { + dev_err(&pdev->dev, "Request NULL\n"); + goto process_pentry; + } + + cpt_status = pentry->completion_addr; + if (unlikely(!cpt_status)) { + dev_err(&pdev->dev, "Completion address NULL\n"); + goto process_pentry; + } + + if (cpt_process_ccode(lfs, cpt_status, info, &res_code)) { + spin_unlock_bh(&pqueue->lock); + return; + } + info->pdev = pdev; + +process_pentry: + /* + * Check if we should inform sending side to resume + * We do it CPT_IQ_RESUME_MARGIN elements in advance before + * pending queue becomes empty + */ + resume_index = modulo_inc(pqueue->front, pqueue->qlen, + CPT_IQ_RESUME_MARGIN); + resume_pentry = &pqueue->head[resume_index]; + if (resume_pentry && + resume_pentry->resume_sender) { + resume_pentry->resume_sender = false; + callback = resume_pentry->callback; + areq = resume_pentry->areq; + + if (callback) { + spin_unlock_bh(&pqueue->lock); + + /* + * EINPROGRESS is an indication for sending + * side that it can resume sending requests + */ + callback(-EINPROGRESS, areq, info); + spin_lock_bh(&pqueue->lock); + } + } + + callback = pentry->callback; + areq = pentry->areq; + free_pentry(pentry); + + pqueue->pending_count--; + pqueue->front = modulo_inc(pqueue->front, pqueue->qlen, 1); + spin_unlock_bh(&pqueue->lock); + + /* + * Call callback after current pending entry has been + * processed, we don't do it if the callback pointer is + * invalid. + */ + if (callback) + callback(res_code, areq, info); + } +} + +void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe) +{ + process_pending_queue(wqe->lfs, + &wqe->lfs->lf[wqe->lf_num].pqueue); +} + +int otx2_cpt_get_kcrypto_eng_grp_num(struct pci_dev *pdev) +{ + struct otx2_cptvf_dev *cptvf = pci_get_drvdata(pdev); + + return cptvf->lfs.kcrypto_eng_grp_num; +} |