diff options
Diffstat (limited to 'drivers/crypto')
120 files changed, 10214 insertions, 6968 deletions
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 0991f026cb..94f23c6fc9 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -67,6 +67,7 @@ config CRYPTO_DEV_GEODE config ZCRYPT tristate "Support for s390 cryptographic adapters" depends on S390 + depends on AP select HW_RANDOM help Select this option if you want to enable support for @@ -74,23 +75,6 @@ config ZCRYPT to 8 in Coprocessor (CEXxC), EP11 Coprocessor (CEXxP) or Accelerator (CEXxA) mode. -config ZCRYPT_DEBUG - bool "Enable debug features for s390 cryptographic adapters" - default n - depends on DEBUG_KERNEL - depends on ZCRYPT - help - Say 'Y' here to enable some additional debug features on the - s390 cryptographic adapters driver. - - There will be some more sysfs attributes displayed for ap cards - and queues and some flags on crypto requests are interpreted as - debugging messages to force error injection. - - Do not enable on production level kernel build. - - If unsure, say N. - config PKEY tristate "Kernel API for protected key handling" depends on S390 @@ -611,13 +595,13 @@ config CRYPTO_DEV_QCOM_RNG To compile this driver as a module, choose M here. The module will be called qcom-rng. If unsure, say N. -config CRYPTO_DEV_VMX - bool "Support for VMX cryptographic acceleration instructions" - depends on PPC64 && VSX - help - Support for VMX cryptographic acceleration instructions. - -source "drivers/crypto/vmx/Kconfig" +#config CRYPTO_DEV_VMX +# bool "Support for VMX cryptographic acceleration instructions" +# depends on PPC64 && VSX +# help +# Support for VMX cryptographic acceleration instructions. +# +#source "drivers/crypto/vmx/Kconfig" config CRYPTO_DEV_IMGTEC_HASH tristate "Imagination Technologies hardware hash accelerator" @@ -660,6 +644,14 @@ config CRYPTO_DEV_ROCKCHIP_DEBUG This will create /sys/kernel/debug/rk3288_crypto/stats for displaying the number of requests per algorithm and other internal stats. +config CRYPTO_DEV_TEGRA + tristate "Enable Tegra Security Engine" + depends on TEGRA_HOST1X + select CRYPTO_ENGINE + + help + Select this to enable Tegra Security Engine which accelerates various + AES encryption/decryption and HASH algorithms. config CRYPTO_DEV_ZYNQMP_AES tristate "Support for Xilinx ZynqMP AES hw accelerator" diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index d859d6a5f3..ad4ccef67d 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -41,8 +41,9 @@ obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ obj-y += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o +obj-$(CONFIG_CRYPTO_DEV_TEGRA) += tegra/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ -obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ +#obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_BCM_SPU) += bcm/ obj-$(CONFIG_CRYPTO_DEV_SAFEXCEL) += inside-secure/ obj-$(CONFIG_CRYPTO_DEV_ARTPEC6) += axis/ diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c index d358334e59..ee2a28c906 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-hash.c @@ -362,7 +362,7 @@ int sun8i_ce_hash_run(struct crypto_engine *engine, void *breq) digestsize = SHA512_DIGEST_SIZE; /* the padding could be up to two block. */ - buf = kzalloc(bs * 2, GFP_KERNEL | GFP_DMA); + buf = kcalloc(2, bs, GFP_KERNEL | GFP_DMA); if (!buf) { err = -ENOMEM; goto theend; diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c index 83a9093eff..a895e4289e 100644 --- a/drivers/crypto/atmel-i2c.c +++ b/drivers/crypto/atmel-i2c.c @@ -51,7 +51,7 @@ static void atmel_i2c_checksum(struct atmel_i2c_cmd *cmd) *__crc16 = cpu_to_le16(bitrev16(crc16(0, data, len))); } -void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd) +void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd) { cmd->word_addr = COMMAND; cmd->opcode = OPCODE_READ; @@ -68,7 +68,31 @@ void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd) cmd->msecs = MAX_EXEC_TIME_READ; cmd->rxsize = READ_RSP_SIZE; } -EXPORT_SYMBOL(atmel_i2c_init_read_cmd); +EXPORT_SYMBOL(atmel_i2c_init_read_config_cmd); + +int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr) +{ + if (addr < 0 || addr > OTP_ZONE_SIZE) + return -1; + + cmd->word_addr = COMMAND; + cmd->opcode = OPCODE_READ; + /* + * Read the word from OTP zone that may contain e.g. serial + * numbers or similar if persistently pre-initialized and locked + */ + cmd->param1 = OTP_ZONE; + cmd->param2 = cpu_to_le16(addr); + cmd->count = READ_COUNT; + + atmel_i2c_checksum(cmd); + + cmd->msecs = MAX_EXEC_TIME_READ; + cmd->rxsize = READ_RSP_SIZE; + + return 0; +} +EXPORT_SYMBOL(atmel_i2c_init_read_otp_cmd); void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd) { @@ -301,7 +325,7 @@ static int device_sanity_check(struct i2c_client *client) if (!cmd) return -ENOMEM; - atmel_i2c_init_read_cmd(cmd); + atmel_i2c_init_read_config_cmd(cmd); ret = atmel_i2c_send_receive(client, cmd); if (ret) diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h index c0bd429ee2..72f04c1568 100644 --- a/drivers/crypto/atmel-i2c.h +++ b/drivers/crypto/atmel-i2c.h @@ -64,6 +64,10 @@ struct atmel_i2c_cmd { /* Definitions for eeprom organization */ #define CONFIGURATION_ZONE 0 +#define OTP_ZONE 1 + +/* Definitions for eeprom zone sizes */ +#define OTP_ZONE_SIZE 64 /* Definitions for Indexes common to all commands */ #define RSP_DATA_IDX 1 /* buffer index of data in response */ @@ -124,6 +128,7 @@ struct atmel_ecc_driver_data { * @wake_token : wake token array of zeros * @wake_token_sz : size in bytes of the wake_token * @tfm_count : number of active crypto transformations on i2c client + * @hwrng : hold the hardware generated rng * * Reads and writes from/to the i2c client are sequential. The first byte * transmitted to the device is treated as the byte size. Any attempt to send @@ -177,7 +182,8 @@ void atmel_i2c_flush_queue(void); int atmel_i2c_send_receive(struct i2c_client *client, struct atmel_i2c_cmd *cmd); -void atmel_i2c_init_read_cmd(struct atmel_i2c_cmd *cmd); +void atmel_i2c_init_read_config_cmd(struct atmel_i2c_cmd *cmd); +int atmel_i2c_init_read_otp_cmd(struct atmel_i2c_cmd *cmd, u16 addr); void atmel_i2c_init_random_cmd(struct atmel_i2c_cmd *cmd); void atmel_i2c_init_genkey_cmd(struct atmel_i2c_cmd *cmd, u16 keyid); int atmel_i2c_init_ecdh_cmd(struct atmel_i2c_cmd *cmd, diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index c77f482d2a..2034f60315 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -91,6 +91,62 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max, return max; } +static int atmel_sha204a_otp_read(struct i2c_client *client, u16 addr, u8 *otp) +{ + struct atmel_i2c_cmd cmd; + int ret = -1; + + if (atmel_i2c_init_read_otp_cmd(&cmd, addr) < 0) { + dev_err(&client->dev, "failed, invalid otp address %04X\n", + addr); + return ret; + } + + ret = atmel_i2c_send_receive(client, &cmd); + + if (cmd.data[0] == 0xff) { + dev_err(&client->dev, "failed, device not ready\n"); + return -EINVAL; + } + + memcpy(otp, cmd.data+1, 4); + + return ret; +} + +static ssize_t otp_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u16 addr; + u8 otp[OTP_ZONE_SIZE]; + char *str = buf; + struct i2c_client *client = to_i2c_client(dev); + int i; + + for (addr = 0; addr < OTP_ZONE_SIZE/4; addr++) { + if (atmel_sha204a_otp_read(client, addr, otp + addr * 4) < 0) { + dev_err(dev, "failed to read otp zone\n"); + break; + } + } + + for (i = 0; i < addr*2; i++) + str += sprintf(str, "%02X", otp[i]); + str += sprintf(str, "\n"); + return str - buf; +} +static DEVICE_ATTR_RO(otp); + +static struct attribute *atmel_sha204a_attrs[] = { + &dev_attr_otp.attr, + NULL +}; + +static const struct attribute_group atmel_sha204a_groups = { + .name = "atsha204a", + .attrs = atmel_sha204a_attrs, +}; + static int atmel_sha204a_probe(struct i2c_client *client) { struct atmel_i2c_client_priv *i2c_priv; @@ -111,6 +167,16 @@ static int atmel_sha204a_probe(struct i2c_client *client) if (ret) dev_warn(&client->dev, "failed to register RNG (%d)\n", ret); + /* otp read out */ + if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) + return -ENODEV; + + ret = sysfs_create_group(&client->dev.kobj, &atmel_sha204a_groups); + if (ret) { + dev_err(&client->dev, "failed to register sysfs entry\n"); + return ret; + } + return ret; } @@ -123,6 +189,8 @@ static void atmel_sha204a_remove(struct i2c_client *client) return; } + sysfs_remove_group(&client->dev.kobj, &atmel_sha204a_groups); + kfree((void *)i2c_priv->hwrng.priv); } diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index bdf367f3f6..bd418dea58 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -512,6 +512,7 @@ static const struct of_device_id caam_match[] = { MODULE_DEVICE_TABLE(of, caam_match); struct caam_imx_data { + bool page0_access; const struct clk_bulk_data *clks; int num_clks; }; @@ -524,6 +525,7 @@ static const struct clk_bulk_data caam_imx6_clks[] = { }; static const struct caam_imx_data caam_imx6_data = { + .page0_access = true, .clks = caam_imx6_clks, .num_clks = ARRAY_SIZE(caam_imx6_clks), }; @@ -534,6 +536,7 @@ static const struct clk_bulk_data caam_imx7_clks[] = { }; static const struct caam_imx_data caam_imx7_data = { + .page0_access = true, .clks = caam_imx7_clks, .num_clks = ARRAY_SIZE(caam_imx7_clks), }; @@ -545,6 +548,7 @@ static const struct clk_bulk_data caam_imx6ul_clks[] = { }; static const struct caam_imx_data caam_imx6ul_data = { + .page0_access = true, .clks = caam_imx6ul_clks, .num_clks = ARRAY_SIZE(caam_imx6ul_clks), }; @@ -554,15 +558,19 @@ static const struct clk_bulk_data caam_vf610_clks[] = { }; static const struct caam_imx_data caam_vf610_data = { + .page0_access = true, .clks = caam_vf610_clks, .num_clks = ARRAY_SIZE(caam_vf610_clks), }; +static const struct caam_imx_data caam_imx8ulp_data; + static const struct soc_device_attribute caam_imx_soc_table[] = { { .soc_id = "i.MX6UL", .data = &caam_imx6ul_data }, { .soc_id = "i.MX6*", .data = &caam_imx6_data }, { .soc_id = "i.MX7*", .data = &caam_imx7_data }, { .soc_id = "i.MX8M*", .data = &caam_imx7_data }, + { .soc_id = "i.MX8ULP", .data = &caam_imx8ulp_data }, { .soc_id = "VF*", .data = &caam_vf610_data }, { .family = "Freescale i.MX" }, { /* sentinel */ } @@ -860,6 +868,7 @@ static int caam_probe(struct platform_device *pdev) int pg_size; int BLOCK_OFFSET = 0; bool reg_access = true; + const struct caam_imx_data *imx_soc_data; ctrlpriv = devm_kzalloc(&pdev->dev, sizeof(*ctrlpriv), GFP_KERNEL); if (!ctrlpriv) @@ -894,12 +903,20 @@ static int caam_probe(struct platform_device *pdev) return -EINVAL; } + imx_soc_data = imx_soc_match->data; + reg_access = reg_access && imx_soc_data->page0_access; + /* + * CAAM clocks cannot be controlled from kernel. + */ + if (!imx_soc_data->num_clks) + goto iomap_ctrl; + ret = init_clocks(dev, imx_soc_match->data); if (ret) return ret; } - +iomap_ctrl: /* Get configuration properties from device tree */ /* First, get register page */ ctrl = devm_of_iomap(dev, nprop, 0, NULL); diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig index 32268e239b..f394e45e11 100644 --- a/drivers/crypto/ccp/Kconfig +++ b/drivers/crypto/ccp/Kconfig @@ -38,7 +38,7 @@ config CRYPTO_DEV_CCP_CRYPTO config CRYPTO_DEV_SP_PSP bool "Platform Security Processor (PSP) device" default y - depends on CRYPTO_DEV_CCP_DD && X86_64 + depends on CRYPTO_DEV_CCP_DD && X86_64 && AMD_IOMMU help Provide support for the AMD Platform Security Processor (PSP). The PSP is a dedicated processor that provides support for key diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c index 124a2e0c89..56bf832c29 100644 --- a/drivers/crypto/ccp/psp-dev.c +++ b/drivers/crypto/ccp/psp-dev.c @@ -156,11 +156,14 @@ static unsigned int psp_get_capability(struct psp_device *psp) } psp->capability = val; - /* Detect if TSME and SME are both enabled */ + /* Detect TSME and/or SME status */ if (PSP_CAPABILITY(psp, PSP_SECURITY_REPORTING) && - psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET) && - cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) - dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n"); + psp->capability & (PSP_SECURITY_TSME_STATUS << PSP_CAPABILITY_PSP_SECURITY_OFFSET)) { + if (cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + dev_notice(psp->dev, "psp: Both TSME and SME are active, SME is unnecessary when TSME is active.\n"); + else + dev_notice(psp->dev, "psp: TSME enabled\n"); + } return 0; } diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index b04bc1d3d6..1912bee22d 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -21,14 +21,18 @@ #include <linux/hw_random.h> #include <linux/ccp.h> #include <linux/firmware.h> +#include <linux/panic_notifier.h> #include <linux/gfp.h> #include <linux/cpufeature.h> #include <linux/fs.h> #include <linux/fs_struct.h> #include <linux/psp.h> +#include <linux/amd-iommu.h> #include <asm/smp.h> #include <asm/cacheflush.h> +#include <asm/e820/types.h> +#include <asm/sev.h> #include "psp-dev.h" #include "sev-dev.h" @@ -37,6 +41,19 @@ #define SEV_FW_FILE "amd/sev.fw" #define SEV_FW_NAME_SIZE 64 +/* Minimum firmware version required for the SEV-SNP support */ +#define SNP_MIN_API_MAJOR 1 +#define SNP_MIN_API_MINOR 51 + +/* + * Maximum number of firmware-writable buffers that might be specified + * in the parameters of a legacy SEV command buffer. + */ +#define CMD_BUF_FW_WRITABLE_MAX 2 + +/* Leave room in the descriptor array for an end-of-list indicator. */ +#define CMD_BUF_DESC_MAX (CMD_BUF_FW_WRITABLE_MAX + 1) + static DEFINE_MUTEX(sev_cmd_mutex); static struct sev_misc_dev *misc_dev; @@ -68,9 +85,14 @@ static int psp_timeout; * The TMR is a 1MB area that must be 1MB aligned. Use the page allocator * to allocate the memory, which will return aligned memory for the specified * allocation order. + * + * When SEV-SNP is enabled the TMR needs to be 2MB aligned and 2MB sized. */ -#define SEV_ES_TMR_SIZE (1024 * 1024) +#define SEV_TMR_SIZE (1024 * 1024) +#define SNP_TMR_SIZE (2 * 1024 * 1024) + static void *sev_es_tmr; +static size_t sev_es_tmr_size = SEV_TMR_SIZE; /* INIT_EX NV Storage: * The NV Storage is a 32Kb area and must be 4Kb page aligned. Use the page @@ -80,6 +102,13 @@ static void *sev_es_tmr; #define NV_LENGTH (32 * 1024) static void *sev_init_ex_buffer; +/* + * SEV_DATA_RANGE_LIST: + * Array containing range of pages that firmware transitions to HV-fixed + * page state. + */ +static struct sev_data_range_list *snp_range_list; + static inline bool sev_version_greater_or_equal(u8 maj, u8 min) { struct sev_device *sev = psp_master->sev_data; @@ -115,6 +144,25 @@ static int sev_wait_cmd_ioc(struct sev_device *sev, { int ret; + /* + * If invoked during panic handling, local interrupts are disabled, + * so the PSP command completion interrupt can't be used. Poll for + * PSP command completion instead. + */ + if (irqs_disabled()) { + unsigned long timeout_usecs = (timeout * USEC_PER_SEC) / 10; + + /* Poll for SEV command completion: */ + while (timeout_usecs--) { + *reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg); + if (*reg & PSP_CMDRESP_RESP) + return 0; + + udelay(10); + } + return -ETIMEDOUT; + } + ret = wait_event_timeout(sev->int_queue, sev->int_rcvd, timeout * HZ); if (!ret) @@ -130,6 +178,8 @@ static int sev_cmd_buffer_len(int cmd) switch (cmd) { case SEV_CMD_INIT: return sizeof(struct sev_data_init); case SEV_CMD_INIT_EX: return sizeof(struct sev_data_init_ex); + case SEV_CMD_SNP_SHUTDOWN_EX: return sizeof(struct sev_data_snp_shutdown_ex); + case SEV_CMD_SNP_INIT_EX: return sizeof(struct sev_data_snp_init_ex); case SEV_CMD_PLATFORM_STATUS: return sizeof(struct sev_user_data_status); case SEV_CMD_PEK_CSR: return sizeof(struct sev_data_pek_csr); case SEV_CMD_PEK_CERT_IMPORT: return sizeof(struct sev_data_pek_cert_import); @@ -158,23 +208,27 @@ static int sev_cmd_buffer_len(int cmd) case SEV_CMD_GET_ID: return sizeof(struct sev_data_get_id); case SEV_CMD_ATTESTATION_REPORT: return sizeof(struct sev_data_attestation_report); case SEV_CMD_SEND_CANCEL: return sizeof(struct sev_data_send_cancel); + case SEV_CMD_SNP_GCTX_CREATE: return sizeof(struct sev_data_snp_addr); + case SEV_CMD_SNP_LAUNCH_START: return sizeof(struct sev_data_snp_launch_start); + case SEV_CMD_SNP_LAUNCH_UPDATE: return sizeof(struct sev_data_snp_launch_update); + case SEV_CMD_SNP_ACTIVATE: return sizeof(struct sev_data_snp_activate); + case SEV_CMD_SNP_DECOMMISSION: return sizeof(struct sev_data_snp_addr); + case SEV_CMD_SNP_PAGE_RECLAIM: return sizeof(struct sev_data_snp_page_reclaim); + case SEV_CMD_SNP_GUEST_STATUS: return sizeof(struct sev_data_snp_guest_status); + case SEV_CMD_SNP_LAUNCH_FINISH: return sizeof(struct sev_data_snp_launch_finish); + case SEV_CMD_SNP_DBG_DECRYPT: return sizeof(struct sev_data_snp_dbg); + case SEV_CMD_SNP_DBG_ENCRYPT: return sizeof(struct sev_data_snp_dbg); + case SEV_CMD_SNP_PAGE_UNSMASH: return sizeof(struct sev_data_snp_page_unsmash); + case SEV_CMD_SNP_PLATFORM_STATUS: return sizeof(struct sev_data_snp_addr); + case SEV_CMD_SNP_GUEST_REQUEST: return sizeof(struct sev_data_snp_guest_request); + case SEV_CMD_SNP_CONFIG: return sizeof(struct sev_user_data_snp_config); + case SEV_CMD_SNP_COMMIT: return sizeof(struct sev_data_snp_commit); default: return 0; } return 0; } -static void *sev_fw_alloc(unsigned long len) -{ - struct page *page; - - page = alloc_pages(GFP_KERNEL, get_order(len)); - if (!page) - return NULL; - - return page_address(page); -} - static struct file *open_file_as_root(const char *filename, int flags, umode_t mode) { struct file *fp; @@ -305,13 +359,485 @@ static int sev_write_init_ex_file_if_required(int cmd_id) return sev_write_init_ex_file(); } +/* + * snp_reclaim_pages() needs __sev_do_cmd_locked(), and __sev_do_cmd_locked() + * needs snp_reclaim_pages(), so a forward declaration is needed. + */ +static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret); + +static int snp_reclaim_pages(unsigned long paddr, unsigned int npages, bool locked) +{ + int ret, err, i; + + paddr = __sme_clr(ALIGN_DOWN(paddr, PAGE_SIZE)); + + for (i = 0; i < npages; i++, paddr += PAGE_SIZE) { + struct sev_data_snp_page_reclaim data = {0}; + + data.paddr = paddr; + + if (locked) + ret = __sev_do_cmd_locked(SEV_CMD_SNP_PAGE_RECLAIM, &data, &err); + else + ret = sev_do_cmd(SEV_CMD_SNP_PAGE_RECLAIM, &data, &err); + + if (ret) + goto cleanup; + + ret = rmp_make_shared(__phys_to_pfn(paddr), PG_LEVEL_4K); + if (ret) + goto cleanup; + } + + return 0; + +cleanup: + /* + * If there was a failure reclaiming the page then it is no longer safe + * to release it back to the system; leak it instead. + */ + snp_leak_pages(__phys_to_pfn(paddr), npages - i); + return ret; +} + +static int rmp_mark_pages_firmware(unsigned long paddr, unsigned int npages, bool locked) +{ + unsigned long pfn = __sme_clr(paddr) >> PAGE_SHIFT; + int rc, i; + + for (i = 0; i < npages; i++, pfn++) { + rc = rmp_make_private(pfn, 0, PG_LEVEL_4K, 0, true); + if (rc) + goto cleanup; + } + + return 0; + +cleanup: + /* + * Try unrolling the firmware state changes by + * reclaiming the pages which were already changed to the + * firmware state. + */ + snp_reclaim_pages(paddr, i, locked); + + return rc; +} + +static struct page *__snp_alloc_firmware_pages(gfp_t gfp_mask, int order) +{ + unsigned long npages = 1ul << order, paddr; + struct sev_device *sev; + struct page *page; + + if (!psp_master || !psp_master->sev_data) + return NULL; + + page = alloc_pages(gfp_mask, order); + if (!page) + return NULL; + + /* If SEV-SNP is initialized then add the page in RMP table. */ + sev = psp_master->sev_data; + if (!sev->snp_initialized) + return page; + + paddr = __pa((unsigned long)page_address(page)); + if (rmp_mark_pages_firmware(paddr, npages, false)) + return NULL; + + return page; +} + +void *snp_alloc_firmware_page(gfp_t gfp_mask) +{ + struct page *page; + + page = __snp_alloc_firmware_pages(gfp_mask, 0); + + return page ? page_address(page) : NULL; +} +EXPORT_SYMBOL_GPL(snp_alloc_firmware_page); + +static void __snp_free_firmware_pages(struct page *page, int order, bool locked) +{ + struct sev_device *sev = psp_master->sev_data; + unsigned long paddr, npages = 1ul << order; + + if (!page) + return; + + paddr = __pa((unsigned long)page_address(page)); + if (sev->snp_initialized && + snp_reclaim_pages(paddr, npages, locked)) + return; + + __free_pages(page, order); +} + +void snp_free_firmware_page(void *addr) +{ + if (!addr) + return; + + __snp_free_firmware_pages(virt_to_page(addr), 0, false); +} +EXPORT_SYMBOL_GPL(snp_free_firmware_page); + +static void *sev_fw_alloc(unsigned long len) +{ + struct page *page; + + page = __snp_alloc_firmware_pages(GFP_KERNEL, get_order(len)); + if (!page) + return NULL; + + return page_address(page); +} + +/** + * struct cmd_buf_desc - descriptors for managing legacy SEV command address + * parameters corresponding to buffers that may be written to by firmware. + * + * @paddr_ptr: pointer to the address parameter in the command buffer which may + * need to be saved/restored depending on whether a bounce buffer + * is used. In the case of a bounce buffer, the command buffer + * needs to be updated with the address of the new bounce buffer + * snp_map_cmd_buf_desc() has allocated specifically for it. Must + * be NULL if this descriptor is only an end-of-list indicator. + * + * @paddr_orig: storage for the original address parameter, which can be used to + * restore the original value in @paddr_ptr in cases where it is + * replaced with the address of a bounce buffer. + * + * @len: length of buffer located at the address originally stored at @paddr_ptr + * + * @guest_owned: true if the address corresponds to guest-owned pages, in which + * case bounce buffers are not needed. + */ +struct cmd_buf_desc { + u64 *paddr_ptr; + u64 paddr_orig; + u32 len; + bool guest_owned; +}; + +/* + * If a legacy SEV command parameter is a memory address, those pages in + * turn need to be transitioned to/from firmware-owned before/after + * executing the firmware command. + * + * Additionally, in cases where those pages are not guest-owned, a bounce + * buffer is needed in place of the original memory address parameter. + * + * A set of descriptors are used to keep track of this handling, and + * initialized here based on the specific commands being executed. + */ +static void snp_populate_cmd_buf_desc_list(int cmd, void *cmd_buf, + struct cmd_buf_desc *desc_list) +{ + switch (cmd) { + case SEV_CMD_PDH_CERT_EXPORT: { + struct sev_data_pdh_cert_export *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->pdh_cert_address; + desc_list[0].len = data->pdh_cert_len; + desc_list[1].paddr_ptr = &data->cert_chain_address; + desc_list[1].len = data->cert_chain_len; + break; + } + case SEV_CMD_GET_ID: { + struct sev_data_get_id *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->address; + desc_list[0].len = data->len; + break; + } + case SEV_CMD_PEK_CSR: { + struct sev_data_pek_csr *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->address; + desc_list[0].len = data->len; + break; + } + case SEV_CMD_LAUNCH_UPDATE_DATA: { + struct sev_data_launch_update_data *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->address; + desc_list[0].len = data->len; + desc_list[0].guest_owned = true; + break; + } + case SEV_CMD_LAUNCH_UPDATE_VMSA: { + struct sev_data_launch_update_vmsa *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->address; + desc_list[0].len = data->len; + desc_list[0].guest_owned = true; + break; + } + case SEV_CMD_LAUNCH_MEASURE: { + struct sev_data_launch_measure *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->address; + desc_list[0].len = data->len; + break; + } + case SEV_CMD_LAUNCH_UPDATE_SECRET: { + struct sev_data_launch_secret *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->guest_address; + desc_list[0].len = data->guest_len; + desc_list[0].guest_owned = true; + break; + } + case SEV_CMD_DBG_DECRYPT: { + struct sev_data_dbg *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->dst_addr; + desc_list[0].len = data->len; + desc_list[0].guest_owned = true; + break; + } + case SEV_CMD_DBG_ENCRYPT: { + struct sev_data_dbg *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->dst_addr; + desc_list[0].len = data->len; + desc_list[0].guest_owned = true; + break; + } + case SEV_CMD_ATTESTATION_REPORT: { + struct sev_data_attestation_report *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->address; + desc_list[0].len = data->len; + break; + } + case SEV_CMD_SEND_START: { + struct sev_data_send_start *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->session_address; + desc_list[0].len = data->session_len; + break; + } + case SEV_CMD_SEND_UPDATE_DATA: { + struct sev_data_send_update_data *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->hdr_address; + desc_list[0].len = data->hdr_len; + desc_list[1].paddr_ptr = &data->trans_address; + desc_list[1].len = data->trans_len; + break; + } + case SEV_CMD_SEND_UPDATE_VMSA: { + struct sev_data_send_update_vmsa *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->hdr_address; + desc_list[0].len = data->hdr_len; + desc_list[1].paddr_ptr = &data->trans_address; + desc_list[1].len = data->trans_len; + break; + } + case SEV_CMD_RECEIVE_UPDATE_DATA: { + struct sev_data_receive_update_data *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->guest_address; + desc_list[0].len = data->guest_len; + desc_list[0].guest_owned = true; + break; + } + case SEV_CMD_RECEIVE_UPDATE_VMSA: { + struct sev_data_receive_update_vmsa *data = cmd_buf; + + desc_list[0].paddr_ptr = &data->guest_address; + desc_list[0].len = data->guest_len; + desc_list[0].guest_owned = true; + break; + } + default: + break; + } +} + +static int snp_map_cmd_buf_desc(struct cmd_buf_desc *desc) +{ + unsigned int npages; + + if (!desc->len) + return 0; + + /* Allocate a bounce buffer if this isn't a guest owned page. */ + if (!desc->guest_owned) { + struct page *page; + + page = alloc_pages(GFP_KERNEL_ACCOUNT, get_order(desc->len)); + if (!page) { + pr_warn("Failed to allocate bounce buffer for SEV legacy command.\n"); + return -ENOMEM; + } + + desc->paddr_orig = *desc->paddr_ptr; + *desc->paddr_ptr = __psp_pa(page_to_virt(page)); + } + + npages = PAGE_ALIGN(desc->len) >> PAGE_SHIFT; + + /* Transition the buffer to firmware-owned. */ + if (rmp_mark_pages_firmware(*desc->paddr_ptr, npages, true)) { + pr_warn("Error moving pages to firmware-owned state for SEV legacy command.\n"); + return -EFAULT; + } + + return 0; +} + +static int snp_unmap_cmd_buf_desc(struct cmd_buf_desc *desc) +{ + unsigned int npages; + + if (!desc->len) + return 0; + + npages = PAGE_ALIGN(desc->len) >> PAGE_SHIFT; + + /* Transition the buffers back to hypervisor-owned. */ + if (snp_reclaim_pages(*desc->paddr_ptr, npages, true)) { + pr_warn("Failed to reclaim firmware-owned pages while issuing SEV legacy command.\n"); + return -EFAULT; + } + + /* Copy data from bounce buffer and then free it. */ + if (!desc->guest_owned) { + void *bounce_buf = __va(__sme_clr(*desc->paddr_ptr)); + void *dst_buf = __va(__sme_clr(desc->paddr_orig)); + + memcpy(dst_buf, bounce_buf, desc->len); + __free_pages(virt_to_page(bounce_buf), get_order(desc->len)); + + /* Restore the original address in the command buffer. */ + *desc->paddr_ptr = desc->paddr_orig; + } + + return 0; +} + +static int snp_map_cmd_buf_desc_list(int cmd, void *cmd_buf, struct cmd_buf_desc *desc_list) +{ + int i; + + snp_populate_cmd_buf_desc_list(cmd, cmd_buf, desc_list); + + for (i = 0; i < CMD_BUF_DESC_MAX; i++) { + struct cmd_buf_desc *desc = &desc_list[i]; + + if (!desc->paddr_ptr) + break; + + if (snp_map_cmd_buf_desc(desc)) + goto err_unmap; + } + + return 0; + +err_unmap: + for (i--; i >= 0; i--) + snp_unmap_cmd_buf_desc(&desc_list[i]); + + return -EFAULT; +} + +static int snp_unmap_cmd_buf_desc_list(struct cmd_buf_desc *desc_list) +{ + int i, ret = 0; + + for (i = 0; i < CMD_BUF_DESC_MAX; i++) { + struct cmd_buf_desc *desc = &desc_list[i]; + + if (!desc->paddr_ptr) + break; + + if (snp_unmap_cmd_buf_desc(&desc_list[i])) + ret = -EFAULT; + } + + return ret; +} + +static bool sev_cmd_buf_writable(int cmd) +{ + switch (cmd) { + case SEV_CMD_PLATFORM_STATUS: + case SEV_CMD_GUEST_STATUS: + case SEV_CMD_LAUNCH_START: + case SEV_CMD_RECEIVE_START: + case SEV_CMD_LAUNCH_MEASURE: + case SEV_CMD_SEND_START: + case SEV_CMD_SEND_UPDATE_DATA: + case SEV_CMD_SEND_UPDATE_VMSA: + case SEV_CMD_PEK_CSR: + case SEV_CMD_PDH_CERT_EXPORT: + case SEV_CMD_GET_ID: + case SEV_CMD_ATTESTATION_REPORT: + return true; + default: + return false; + } +} + +/* After SNP is INIT'ed, the behavior of legacy SEV commands is changed. */ +static bool snp_legacy_handling_needed(int cmd) +{ + struct sev_device *sev = psp_master->sev_data; + + return cmd < SEV_CMD_SNP_INIT && sev->snp_initialized; +} + +static int snp_prep_cmd_buf(int cmd, void *cmd_buf, struct cmd_buf_desc *desc_list) +{ + if (!snp_legacy_handling_needed(cmd)) + return 0; + + if (snp_map_cmd_buf_desc_list(cmd, cmd_buf, desc_list)) + return -EFAULT; + + /* + * Before command execution, the command buffer needs to be put into + * the firmware-owned state. + */ + if (sev_cmd_buf_writable(cmd)) { + if (rmp_mark_pages_firmware(__pa(cmd_buf), 1, true)) + return -EFAULT; + } + + return 0; +} + +static int snp_reclaim_cmd_buf(int cmd, void *cmd_buf) +{ + if (!snp_legacy_handling_needed(cmd)) + return 0; + + /* + * After command completion, the command buffer needs to be put back + * into the hypervisor-owned state. + */ + if (sev_cmd_buf_writable(cmd)) + if (snp_reclaim_pages(__pa(cmd_buf), 1, true)) + return -EFAULT; + + return 0; +} + static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret) { + struct cmd_buf_desc desc_list[CMD_BUF_DESC_MAX] = {0}; struct psp_device *psp = psp_master; struct sev_device *sev; unsigned int cmdbuff_hi, cmdbuff_lo; unsigned int phys_lsb, phys_msb; unsigned int reg, ret = 0; + void *cmd_buf; int buf_len; if (!psp || !psp->sev_data) @@ -331,12 +857,47 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret) * work for some memory, e.g. vmalloc'd addresses, and @data may not be * physically contiguous. */ - if (data) - memcpy(sev->cmd_buf, data, buf_len); + if (data) { + /* + * Commands are generally issued one at a time and require the + * sev_cmd_mutex, but there could be recursive firmware requests + * due to SEV_CMD_SNP_PAGE_RECLAIM needing to be issued while + * preparing buffers for another command. This is the only known + * case of nesting in the current code, so exactly one + * additional command buffer is available for that purpose. + */ + if (!sev->cmd_buf_active) { + cmd_buf = sev->cmd_buf; + sev->cmd_buf_active = true; + } else if (!sev->cmd_buf_backup_active) { + cmd_buf = sev->cmd_buf_backup; + sev->cmd_buf_backup_active = true; + } else { + dev_err(sev->dev, + "SEV: too many firmware commands in progress, no command buffers available.\n"); + return -EBUSY; + } + + memcpy(cmd_buf, data, buf_len); + + /* + * The behavior of the SEV-legacy commands is altered when the + * SNP firmware is in the INIT state. + */ + ret = snp_prep_cmd_buf(cmd, cmd_buf, desc_list); + if (ret) { + dev_err(sev->dev, + "SEV: failed to prepare buffer for legacy command 0x%x. Error: %d\n", + cmd, ret); + return ret; + } + } else { + cmd_buf = sev->cmd_buf; + } /* Get the physical address of the command buffer */ - phys_lsb = data ? lower_32_bits(__psp_pa(sev->cmd_buf)) : 0; - phys_msb = data ? upper_32_bits(__psp_pa(sev->cmd_buf)) : 0; + phys_lsb = data ? lower_32_bits(__psp_pa(cmd_buf)) : 0; + phys_msb = data ? upper_32_bits(__psp_pa(cmd_buf)) : 0; dev_dbg(sev->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n", cmd, phys_msb, phys_lsb, psp_timeout); @@ -390,20 +951,41 @@ static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret) ret = sev_write_init_ex_file_if_required(cmd); } - print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data, - buf_len, false); - /* * Copy potential output from the PSP back to data. Do this even on * failure in case the caller wants to glean something from the error. */ - if (data) - memcpy(data, sev->cmd_buf, buf_len); + if (data) { + int ret_reclaim; + /* + * Restore the page state after the command completes. + */ + ret_reclaim = snp_reclaim_cmd_buf(cmd, cmd_buf); + if (ret_reclaim) { + dev_err(sev->dev, + "SEV: failed to reclaim buffer for legacy command %#x. Error: %d\n", + cmd, ret_reclaim); + return ret_reclaim; + } + + memcpy(data, cmd_buf, buf_len); + + if (sev->cmd_buf_backup_active) + sev->cmd_buf_backup_active = false; + else + sev->cmd_buf_active = false; + + if (snp_unmap_cmd_buf_desc_list(desc_list)) + return -EFAULT; + } + + print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data, + buf_len, false); return ret; } -static int sev_do_cmd(int cmd, void *data, int *psp_ret) +int sev_do_cmd(int cmd, void *data, int *psp_ret) { int rc; @@ -413,6 +995,7 @@ static int sev_do_cmd(int cmd, void *data, int *psp_ret) return rc; } +EXPORT_SYMBOL_GPL(sev_do_cmd); static int __sev_init_locked(int *error) { @@ -427,7 +1010,7 @@ static int __sev_init_locked(int *error) data.tmr_address = __pa(sev_es_tmr); data.flags |= SEV_INIT_FLAGS_SEV_ES; - data.tmr_len = SEV_ES_TMR_SIZE; + data.tmr_len = sev_es_tmr_size; } return __sev_do_cmd_locked(SEV_CMD_INIT, &data, error); @@ -450,7 +1033,7 @@ static int __sev_init_ex_locked(int *error) data.tmr_address = __pa(sev_es_tmr); data.flags |= SEV_INIT_FLAGS_SEV_ES; - data.tmr_len = SEV_ES_TMR_SIZE; + data.tmr_len = sev_es_tmr_size; } return __sev_do_cmd_locked(SEV_CMD_INIT_EX, &data, error); @@ -464,26 +1047,218 @@ static inline int __sev_do_init_locked(int *psp_ret) return __sev_init_locked(psp_ret); } -static int __sev_platform_init_locked(int *error) +static void snp_set_hsave_pa(void *arg) +{ + wrmsrl(MSR_VM_HSAVE_PA, 0); +} + +static int snp_filter_reserved_mem_regions(struct resource *rs, void *arg) +{ + struct sev_data_range_list *range_list = arg; + struct sev_data_range *range = &range_list->ranges[range_list->num_elements]; + size_t size; + + /* + * Ensure the list of HV_FIXED pages that will be passed to firmware + * do not exceed the page-sized argument buffer. + */ + if ((range_list->num_elements * sizeof(struct sev_data_range) + + sizeof(struct sev_data_range_list)) > PAGE_SIZE) + return -E2BIG; + + switch (rs->desc) { + case E820_TYPE_RESERVED: + case E820_TYPE_PMEM: + case E820_TYPE_ACPI: + range->base = rs->start & PAGE_MASK; + size = PAGE_ALIGN((rs->end + 1) - rs->start); + range->page_count = size >> PAGE_SHIFT; + range_list->num_elements++; + break; + default: + break; + } + + return 0; +} + +static int __sev_snp_init_locked(int *error) { - int rc = 0, psp_ret = SEV_RET_NO_FW_CALL; struct psp_device *psp = psp_master; + struct sev_data_snp_init_ex data; struct sev_device *sev; + void *arg = &data; + int cmd, rc = 0; - if (!psp || !psp->sev_data) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; sev = psp->sev_data; - if (sev->state == SEV_STATE_INIT) + if (sev->snp_initialized) return 0; - if (sev_init_ex_buffer) { - rc = sev_read_init_ex_file(); - if (rc) + if (!sev_version_greater_or_equal(SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR)) { + dev_dbg(sev->dev, "SEV-SNP support requires firmware version >= %d:%d\n", + SNP_MIN_API_MAJOR, SNP_MIN_API_MINOR); + return 0; + } + + /* SNP_INIT requires MSR_VM_HSAVE_PA to be cleared on all CPUs. */ + on_each_cpu(snp_set_hsave_pa, NULL, 1); + + /* + * Starting in SNP firmware v1.52, the SNP_INIT_EX command takes a list + * of system physical address ranges to convert into HV-fixed page + * states during the RMP initialization. For instance, the memory that + * UEFI reserves should be included in the that list. This allows system + * components that occasionally write to memory (e.g. logging to UEFI + * reserved regions) to not fail due to RMP initialization and SNP + * enablement. + * + */ + if (sev_version_greater_or_equal(SNP_MIN_API_MAJOR, 52)) { + /* + * Firmware checks that the pages containing the ranges enumerated + * in the RANGES structure are either in the default page state or in the + * firmware page state. + */ + snp_range_list = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!snp_range_list) { + dev_err(sev->dev, + "SEV: SNP_INIT_EX range list memory allocation failed\n"); + return -ENOMEM; + } + + /* + * Retrieve all reserved memory regions from the e820 memory map + * to be setup as HV-fixed pages. + */ + rc = walk_iomem_res_desc(IORES_DESC_NONE, IORESOURCE_MEM, 0, ~0, + snp_range_list, snp_filter_reserved_mem_regions); + if (rc) { + dev_err(sev->dev, + "SEV: SNP_INIT_EX walk_iomem_res_desc failed rc = %d\n", rc); return rc; + } + + memset(&data, 0, sizeof(data)); + data.init_rmp = 1; + data.list_paddr_en = 1; + data.list_paddr = __psp_pa(snp_range_list); + cmd = SEV_CMD_SNP_INIT_EX; + } else { + cmd = SEV_CMD_SNP_INIT; + arg = NULL; } + /* + * The following sequence must be issued before launching the first SNP + * guest to ensure all dirty cache lines are flushed, including from + * updates to the RMP table itself via the RMPUPDATE instruction: + * + * - WBINVD on all running CPUs + * - SEV_CMD_SNP_INIT[_EX] firmware command + * - WBINVD on all running CPUs + * - SEV_CMD_SNP_DF_FLUSH firmware command + */ + wbinvd_on_all_cpus(); + + rc = __sev_do_cmd_locked(cmd, arg, error); + if (rc) + return rc; + + /* Prepare for first SNP guest launch after INIT. */ + wbinvd_on_all_cpus(); + rc = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, error); + if (rc) + return rc; + + sev->snp_initialized = true; + dev_dbg(sev->dev, "SEV-SNP firmware initialized\n"); + + sev_es_tmr_size = SNP_TMR_SIZE; + + return rc; +} + +static void __sev_platform_init_handle_tmr(struct sev_device *sev) +{ + if (sev_es_tmr) + return; + + /* Obtain the TMR memory area for SEV-ES use */ + sev_es_tmr = sev_fw_alloc(sev_es_tmr_size); + if (sev_es_tmr) { + /* Must flush the cache before giving it to the firmware */ + if (!sev->snp_initialized) + clflush_cache_range(sev_es_tmr, sev_es_tmr_size); + } else { + dev_warn(sev->dev, "SEV: TMR allocation failed, SEV-ES support unavailable\n"); + } +} + +/* + * If an init_ex_path is provided allocate a buffer for the file and + * read in the contents. Additionally, if SNP is initialized, convert + * the buffer pages to firmware pages. + */ +static int __sev_platform_init_handle_init_ex_path(struct sev_device *sev) +{ + struct page *page; + int rc; + + if (!init_ex_path) + return 0; + + if (sev_init_ex_buffer) + return 0; + + page = alloc_pages(GFP_KERNEL, get_order(NV_LENGTH)); + if (!page) { + dev_err(sev->dev, "SEV: INIT_EX NV memory allocation failed\n"); + return -ENOMEM; + } + + sev_init_ex_buffer = page_address(page); + + rc = sev_read_init_ex_file(); + if (rc) + return rc; + + /* If SEV-SNP is initialized, transition to firmware page. */ + if (sev->snp_initialized) { + unsigned long npages; + + npages = 1UL << get_order(NV_LENGTH); + if (rmp_mark_pages_firmware(__pa(sev_init_ex_buffer), npages, false)) { + dev_err(sev->dev, "SEV: INIT_EX NV memory page state change failed.\n"); + return -ENOMEM; + } + } + + return 0; +} + +static int __sev_platform_init_locked(int *error) +{ + int rc, psp_ret = SEV_RET_NO_FW_CALL; + struct sev_device *sev; + + if (!psp_master || !psp_master->sev_data) + return -ENODEV; + + sev = psp_master->sev_data; + + if (sev->state == SEV_STATE_INIT) + return 0; + + __sev_platform_init_handle_tmr(sev); + + rc = __sev_platform_init_handle_init_ex_path(sev); + if (rc) + return rc; + rc = __sev_do_init_locked(&psp_ret); if (rc && psp_ret == SEV_RET_SECURE_DATA_INVALID) { /* @@ -520,12 +1295,46 @@ static int __sev_platform_init_locked(int *error) return 0; } -int sev_platform_init(int *error) +static int _sev_platform_init_locked(struct sev_platform_init_args *args) +{ + struct sev_device *sev; + int rc; + + if (!psp_master || !psp_master->sev_data) + return -ENODEV; + + sev = psp_master->sev_data; + + if (sev->state == SEV_STATE_INIT) + return 0; + + /* + * Legacy guests cannot be running while SNP_INIT(_EX) is executing, + * so perform SEV-SNP initialization at probe time. + */ + rc = __sev_snp_init_locked(&args->error); + if (rc && rc != -ENODEV) { + /* + * Don't abort the probe if SNP INIT failed, + * continue to initialize the legacy SEV firmware. + */ + dev_err(sev->dev, "SEV-SNP: failed to INIT rc %d, error %#x\n", + rc, args->error); + } + + /* Defer legacy SEV/SEV-ES support if allowed by caller/module. */ + if (args->probe && !psp_init_on_probe) + return 0; + + return __sev_platform_init_locked(&args->error); +} + +int sev_platform_init(struct sev_platform_init_args *args) { int rc; mutex_lock(&sev_cmd_mutex); - rc = __sev_platform_init_locked(error); + rc = _sev_platform_init_locked(args); mutex_unlock(&sev_cmd_mutex); return rc; @@ -556,17 +1365,6 @@ static int __sev_platform_shutdown_locked(int *error) return ret; } -static int sev_platform_shutdown(int *error) -{ - int rc; - - mutex_lock(&sev_cmd_mutex); - rc = __sev_platform_shutdown_locked(NULL); - mutex_unlock(&sev_cmd_mutex); - - return rc; -} - static int sev_get_platform_state(int *state, int *error) { struct sev_user_data_status data; @@ -842,6 +1640,78 @@ fw_err: return ret; } +static int __sev_snp_shutdown_locked(int *error, bool panic) +{ + struct psp_device *psp = psp_master; + struct sev_device *sev; + struct sev_data_snp_shutdown_ex data; + int ret; + + if (!psp || !psp->sev_data) + return 0; + + sev = psp->sev_data; + + if (!sev->snp_initialized) + return 0; + + memset(&data, 0, sizeof(data)); + data.len = sizeof(data); + data.iommu_snp_shutdown = 1; + + /* + * If invoked during panic handling, local interrupts are disabled + * and all CPUs are stopped, so wbinvd_on_all_cpus() can't be called. + * In that case, a wbinvd() is done on remote CPUs via the NMI + * callback, so only a local wbinvd() is needed here. + */ + if (!panic) + wbinvd_on_all_cpus(); + else + wbinvd(); + + ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, error); + /* SHUTDOWN may require DF_FLUSH */ + if (*error == SEV_RET_DFFLUSH_REQUIRED) { + ret = __sev_do_cmd_locked(SEV_CMD_SNP_DF_FLUSH, NULL, NULL); + if (ret) { + dev_err(sev->dev, "SEV-SNP DF_FLUSH failed\n"); + return ret; + } + /* reissue the shutdown command */ + ret = __sev_do_cmd_locked(SEV_CMD_SNP_SHUTDOWN_EX, &data, + error); + } + if (ret) { + dev_err(sev->dev, "SEV-SNP firmware shutdown failed\n"); + return ret; + } + + /* + * SNP_SHUTDOWN_EX with IOMMU_SNP_SHUTDOWN set to 1 disables SNP + * enforcement by the IOMMU and also transitions all pages + * associated with the IOMMU to the Reclaim state. + * Firmware was transitioning the IOMMU pages to Hypervisor state + * before version 1.53. But, accounting for the number of assigned + * 4kB pages in a 2M page was done incorrectly by not transitioning + * to the Reclaim state. This resulted in RMP #PF when later accessing + * the 2M page containing those pages during kexec boot. Hence, the + * firmware now transitions these pages to Reclaim state and hypervisor + * needs to transition these pages to shared state. SNP Firmware + * version 1.53 and above are needed for kexec boot. + */ + ret = amd_iommu_snp_disable(); + if (ret) { + dev_err(sev->dev, "SNP IOMMU shutdown failed\n"); + return ret; + } + + sev->snp_initialized = false; + dev_dbg(sev->dev, "SEV-SNP firmware shutdown\n"); + + return ret; +} + static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable) { struct sev_device *sev = psp_master->sev_data; @@ -1084,6 +1954,85 @@ e_free_pdh: return ret; } +static int sev_ioctl_do_snp_platform_status(struct sev_issue_cmd *argp) +{ + struct sev_device *sev = psp_master->sev_data; + struct sev_data_snp_addr buf; + struct page *status_page; + void *data; + int ret; + + if (!sev->snp_initialized || !argp->data) + return -EINVAL; + + status_page = alloc_page(GFP_KERNEL_ACCOUNT); + if (!status_page) + return -ENOMEM; + + data = page_address(status_page); + + /* + * Firmware expects status page to be in firmware-owned state, otherwise + * it will report firmware error code INVALID_PAGE_STATE (0x1A). + */ + if (rmp_mark_pages_firmware(__pa(data), 1, true)) { + ret = -EFAULT; + goto cleanup; + } + + buf.address = __psp_pa(data); + ret = __sev_do_cmd_locked(SEV_CMD_SNP_PLATFORM_STATUS, &buf, &argp->error); + + /* + * Status page will be transitioned to Reclaim state upon success, or + * left in Firmware state in failure. Use snp_reclaim_pages() to + * transition either case back to Hypervisor-owned state. + */ + if (snp_reclaim_pages(__pa(data), 1, true)) + return -EFAULT; + + if (ret) + goto cleanup; + + if (copy_to_user((void __user *)argp->data, data, + sizeof(struct sev_user_data_snp_status))) + ret = -EFAULT; + +cleanup: + __free_pages(status_page, 0); + return ret; +} + +static int sev_ioctl_do_snp_commit(struct sev_issue_cmd *argp) +{ + struct sev_device *sev = psp_master->sev_data; + struct sev_data_snp_commit buf; + + if (!sev->snp_initialized) + return -EINVAL; + + buf.len = sizeof(buf); + + return __sev_do_cmd_locked(SEV_CMD_SNP_COMMIT, &buf, &argp->error); +} + +static int sev_ioctl_do_snp_set_config(struct sev_issue_cmd *argp, bool writable) +{ + struct sev_device *sev = psp_master->sev_data; + struct sev_user_data_snp_config config; + + if (!sev->snp_initialized || !argp->data) + return -EINVAL; + + if (!writable) + return -EPERM; + + if (copy_from_user(&config, (void __user *)argp->data, sizeof(config))) + return -EFAULT; + + return __sev_do_cmd_locked(SEV_CMD_SNP_CONFIG, &config, &argp->error); +} + static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) { void __user *argp = (void __user *)arg; @@ -1135,6 +2084,15 @@ static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) case SEV_GET_ID2: ret = sev_ioctl_do_get_id2(&input); break; + case SNP_PLATFORM_STATUS: + ret = sev_ioctl_do_snp_platform_status(&input); + break; + case SNP_COMMIT: + ret = sev_ioctl_do_snp_commit(&input); + break; + case SNP_SET_CONFIG: + ret = sev_ioctl_do_snp_set_config(&input, writable); + break; default: ret = -EINVAL; goto out; @@ -1245,10 +2203,12 @@ int sev_dev_init(struct psp_device *psp) if (!sev) goto e_err; - sev->cmd_buf = (void *)devm_get_free_pages(dev, GFP_KERNEL, 0); + sev->cmd_buf = (void *)devm_get_free_pages(dev, GFP_KERNEL, 1); if (!sev->cmd_buf) goto e_sev; + sev->cmd_buf_backup = (uint8_t *)sev->cmd_buf + PAGE_SIZE; + psp->sev_data = sev; sev->dev = dev; @@ -1287,24 +2247,51 @@ e_err: return ret; } -static void sev_firmware_shutdown(struct sev_device *sev) +static void __sev_firmware_shutdown(struct sev_device *sev, bool panic) { - sev_platform_shutdown(NULL); + int error; + + __sev_platform_shutdown_locked(NULL); if (sev_es_tmr) { - /* The TMR area was encrypted, flush it from the cache */ - wbinvd_on_all_cpus(); + /* + * The TMR area was encrypted, flush it from the cache. + * + * If invoked during panic handling, local interrupts are + * disabled and all CPUs are stopped, so wbinvd_on_all_cpus() + * can't be used. In that case, wbinvd() is done on remote CPUs + * via the NMI callback, and done for this CPU later during + * SNP shutdown, so wbinvd_on_all_cpus() can be skipped. + */ + if (!panic) + wbinvd_on_all_cpus(); - free_pages((unsigned long)sev_es_tmr, - get_order(SEV_ES_TMR_SIZE)); + __snp_free_firmware_pages(virt_to_page(sev_es_tmr), + get_order(sev_es_tmr_size), + true); sev_es_tmr = NULL; } if (sev_init_ex_buffer) { - free_pages((unsigned long)sev_init_ex_buffer, - get_order(NV_LENGTH)); + __snp_free_firmware_pages(virt_to_page(sev_init_ex_buffer), + get_order(NV_LENGTH), + true); sev_init_ex_buffer = NULL; } + + if (snp_range_list) { + kfree(snp_range_list); + snp_range_list = NULL; + } + + __sev_snp_shutdown_locked(&error, panic); +} + +static void sev_firmware_shutdown(struct sev_device *sev) +{ + mutex_lock(&sev_cmd_mutex); + __sev_firmware_shutdown(sev, false); + mutex_unlock(&sev_cmd_mutex); } void sev_dev_destroy(struct psp_device *psp) @@ -1322,6 +2309,29 @@ void sev_dev_destroy(struct psp_device *psp) psp_clear_sev_irq_handler(psp); } +static int snp_shutdown_on_panic(struct notifier_block *nb, + unsigned long reason, void *arg) +{ + struct sev_device *sev = psp_master->sev_data; + + /* + * If sev_cmd_mutex is already acquired, then it's likely + * another PSP command is in flight and issuing a shutdown + * would fail in unexpected ways. Rather than create even + * more confusion during a panic, just bail out here. + */ + if (mutex_is_locked(&sev_cmd_mutex)) + return NOTIFY_DONE; + + __sev_firmware_shutdown(sev, true); + + return NOTIFY_DONE; +} + +static struct notifier_block snp_panic_notifier = { + .notifier_call = snp_shutdown_on_panic, +}; + int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd, void *data, int *error) { @@ -1335,7 +2345,8 @@ EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user); void sev_pci_init(void) { struct sev_device *sev = psp_master->sev_data; - int error, rc; + struct sev_platform_init_args args = {0}; + int rc; if (!sev) return; @@ -1348,36 +2359,18 @@ void sev_pci_init(void) if (sev_update_firmware(sev->dev) == 0) sev_get_api_version(); - /* If an init_ex_path is provided rely on INIT_EX for PSP initialization - * instead of INIT. - */ - if (init_ex_path) { - sev_init_ex_buffer = sev_fw_alloc(NV_LENGTH); - if (!sev_init_ex_buffer) { - dev_err(sev->dev, - "SEV: INIT_EX NV memory allocation failed\n"); - goto err; - } - } - - /* Obtain the TMR memory area for SEV-ES use */ - sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE); - if (sev_es_tmr) - /* Must flush the cache before giving it to the firmware */ - clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE); - else - dev_warn(sev->dev, - "SEV: TMR allocation failed, SEV-ES support unavailable\n"); - - if (!psp_init_on_probe) - return; - /* Initialize the platform */ - rc = sev_platform_init(&error); + args.probe = true; + rc = sev_platform_init(&args); if (rc) dev_err(sev->dev, "SEV: failed to INIT error %#x, rc %d\n", - error, rc); + args.error, rc); + dev_info(sev->dev, "SEV%s API:%d.%d build:%d\n", sev->snp_initialized ? + "-SNP" : "", sev->api_major, sev->api_minor, sev->build); + + atomic_notifier_chain_register(&panic_notifier_list, + &snp_panic_notifier); return; err: @@ -1392,4 +2385,7 @@ void sev_pci_exit(void) return; sev_firmware_shutdown(sev); + + atomic_notifier_chain_unregister(&panic_notifier_list, + &snp_panic_notifier); } diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h index 778c95155e..3e4e5574e8 100644 --- a/drivers/crypto/ccp/sev-dev.h +++ b/drivers/crypto/ccp/sev-dev.h @@ -52,6 +52,11 @@ struct sev_device { u8 build; void *cmd_buf; + void *cmd_buf_backup; + bool cmd_buf_active; + bool cmd_buf_backup_active; + + bool snp_initialized; }; int sev_dev_init(struct psp_device *psp); diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c index 80ed4b2d20..1b9b7bccde 100644 --- a/drivers/crypto/hisilicon/debugfs.c +++ b/drivers/crypto/hisilicon/debugfs.c @@ -13,6 +13,7 @@ #define QM_DFX_COMMON_LEN 0xC3 #define QM_DFX_REGS_LEN 4UL #define QM_DBG_TMP_BUF_LEN 22 +#define QM_XQC_ADDR_MASK GENMASK(31, 0) #define CURRENT_FUN_MASK GENMASK(5, 0) #define CURRENT_Q_MASK GENMASK(31, 16) #define QM_SQE_ADDR_MASK GENMASK(7, 0) @@ -24,6 +25,8 @@ #define QM_DFX_QN_SHIFT 16 #define QM_DFX_CNT_CLR_CE 0x100118 #define QM_DBG_WRITE_LEN 1024 +#define QM_IN_IDLE_ST_REG 0x1040e4 +#define QM_IN_IDLE_STATE 0x1 static const char * const qm_debug_file_name[] = { [CURRENT_QM] = "current_qm", @@ -81,6 +84,30 @@ static const struct debugfs_reg32 qm_dfx_regs[] = { {"QM_DFX_FF_ST5 ", 0x1040dc}, {"QM_DFX_FF_ST6 ", 0x1040e0}, {"QM_IN_IDLE_ST ", 0x1040e4}, + {"QM_CACHE_CTL ", 0x100050}, + {"QM_TIMEOUT_CFG ", 0x100070}, + {"QM_DB_TIMEOUT_CFG ", 0x100074}, + {"QM_FLR_PENDING_TIME_CFG ", 0x100078}, + {"QM_ARUSR_MCFG1 ", 0x100088}, + {"QM_AWUSR_MCFG1 ", 0x100098}, + {"QM_AXI_M_CFG_ENABLE ", 0x1000B0}, + {"QM_RAS_CE_THRESHOLD ", 0x1000F8}, + {"QM_AXI_TIMEOUT_CTRL ", 0x100120}, + {"QM_AXI_TIMEOUT_STATUS ", 0x100124}, + {"QM_CQE_AGGR_TIMEOUT_CTRL ", 0x100144}, + {"ACC_RAS_MSI_INT_SEL ", 0x1040fc}, + {"QM_CQE_OUT ", 0x104100}, + {"QM_EQE_OUT ", 0x104104}, + {"QM_AEQE_OUT ", 0x104108}, + {"QM_DB_INFO0 ", 0x104180}, + {"QM_DB_INFO1 ", 0x104184}, + {"QM_AM_CTRL_GLOBAL ", 0x300000}, + {"QM_AM_CURR_PORT_STS ", 0x300100}, + {"QM_AM_CURR_TRANS_RETURN ", 0x300150}, + {"QM_AM_CURR_RD_MAX_TXID ", 0x300154}, + {"QM_AM_CURR_WR_MAX_TXID ", 0x300158}, + {"QM_AM_ALARM_RRESP ", 0x300180}, + {"QM_AM_ALARM_BRESP ", 0x300184}, }; static const struct debugfs_reg32 qm_vf_dfx_regs[] = { @@ -141,7 +168,6 @@ static void dump_show(struct hisi_qm *qm, void *info, static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - struct qm_sqc *sqc_curr; struct qm_sqc sqc; u32 qp_id; int ret; @@ -157,6 +183,8 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1); if (!ret) { + sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, &sqc, sizeof(struct qm_sqc), name); return 0; @@ -164,9 +192,10 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) down_read(&qm->qps_lock); if (qm->sqc) { - sqc_curr = qm->sqc + qp_id; - - dump_show(qm, sqc_curr, sizeof(*sqc_curr), "SOFT SQC"); + memcpy(&sqc, qm->sqc + qp_id * sizeof(struct qm_sqc), sizeof(struct qm_sqc)); + sqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + sqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + dump_show(qm, &sqc, sizeof(struct qm_sqc), "SOFT SQC"); } up_read(&qm->qps_lock); @@ -176,7 +205,6 @@ static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) { struct device *dev = &qm->pdev->dev; - struct qm_cqc *cqc_curr; struct qm_cqc cqc; u32 qp_id; int ret; @@ -192,6 +220,8 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1); if (!ret) { + cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, &cqc, sizeof(struct qm_cqc), name); return 0; @@ -199,9 +229,10 @@ static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) down_read(&qm->qps_lock); if (qm->cqc) { - cqc_curr = qm->cqc + qp_id; - - dump_show(qm, cqc_curr, sizeof(*cqc_curr), "SOFT CQC"); + memcpy(&cqc, qm->cqc + qp_id * sizeof(struct qm_cqc), sizeof(struct qm_cqc)); + cqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + cqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + dump_show(qm, &cqc, sizeof(struct qm_cqc), "SOFT CQC"); } up_read(&qm->qps_lock); @@ -237,6 +268,10 @@ static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name) if (ret) return ret; + aeqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + aeqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); + eqc.base_h = cpu_to_le32(QM_XQC_ADDR_MASK); + eqc.base_l = cpu_to_le32(QM_XQC_ADDR_MASK); dump_show(qm, xeqc, size, name); return ret; @@ -284,27 +319,26 @@ static int q_dump_param_parse(struct hisi_qm *qm, char *s, static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name) { - u16 sq_depth = qm->qp_array->cq_depth; - void *sqe, *sqe_curr; + u16 sq_depth = qm->qp_array->sq_depth; struct hisi_qp *qp; u32 qp_id, sqe_id; + void *sqe; int ret; ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth); if (ret) return ret; - sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL); + sqe = kzalloc(qm->sqe_size, GFP_KERNEL); if (!sqe) return -ENOMEM; qp = &qm->qp_array[qp_id]; - memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth); - sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size); - memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, + memcpy(sqe, qp->sqe + sqe_id * qm->sqe_size, qm->sqe_size); + memset(sqe + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, qm->debug.sqe_mask_len); - dump_show(qm, sqe_curr, qm->sqe_size, name); + dump_show(qm, sqe, qm->sqe_size, name); kfree(sqe); @@ -783,8 +817,14 @@ static void dfx_regs_uninit(struct hisi_qm *qm, { int i; + if (!dregs) + return; + /* Setting the pointer is NULL to prevent double free */ for (i = 0; i < reg_len; i++) { + if (!dregs[i].regs) + continue; + kfree(dregs[i].regs); dregs[i].regs = NULL; } @@ -834,14 +874,21 @@ alloc_error: static int qm_diff_regs_init(struct hisi_qm *qm, struct dfx_diff_registers *dregs, u32 reg_len) { + int ret; + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); - if (IS_ERR(qm->debug.qm_diff_regs)) - return PTR_ERR(qm->debug.qm_diff_regs); + if (IS_ERR(qm->debug.qm_diff_regs)) { + ret = PTR_ERR(qm->debug.qm_diff_regs); + qm->debug.qm_diff_regs = NULL; + return ret; + } qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); if (IS_ERR(qm->debug.acc_diff_regs)) { dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); - return PTR_ERR(qm->debug.acc_diff_regs); + ret = PTR_ERR(qm->debug.acc_diff_regs); + qm->debug.acc_diff_regs = NULL; + return ret; } return 0; @@ -882,7 +929,9 @@ static int qm_last_regs_init(struct hisi_qm *qm) static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len) { dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); + qm->debug.acc_diff_regs = NULL; dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); + qm->debug.qm_diff_regs = NULL; } /** @@ -1001,6 +1050,30 @@ static int qm_diff_regs_show(struct seq_file *s, void *unused) } DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); +static int qm_state_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + u32 val; + int ret; + + /* If device is in suspended, directly return the idle state. */ + ret = hisi_qm_get_dfx_access(qm); + if (!ret) { + val = readl(qm->io_base + QM_IN_IDLE_ST_REG); + hisi_qm_put_dfx_access(qm); + } else if (ret == -EAGAIN) { + val = QM_IN_IDLE_STATE; + } else { + return ret; + } + + seq_printf(s, "%u\n", val); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(qm_state); + static ssize_t qm_status_read(struct file *filp, char __user *buffer, size_t count, loff_t *pos) { @@ -1025,12 +1098,12 @@ static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, { struct debugfs_file *file = qm->debug.files + index; - debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, - &qm_debug_fops); - file->index = index; mutex_init(&file->lock); file->debug = &qm->debug; + + debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, + &qm_debug_fops); } static int qm_debugfs_atomic64_set(void *data, u64 val) @@ -1062,6 +1135,7 @@ DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get, void hisi_qm_debug_init(struct hisi_qm *qm) { struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; + struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx; struct qm_dfx *dfx = &qm->debug.dfx; struct dentry *qm_d; void *data; @@ -1072,6 +1146,9 @@ void hisi_qm_debug_init(struct hisi_qm *qm) /* only show this in PF */ if (qm->fun_type == QM_HW_PF) { + debugfs_create_file("qm_state", 0444, qm->debug.qm_d, + qm, &qm_state_fops); + qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM); for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) qm_create_debugfs_file(qm, qm->debug.qm_d, i); @@ -1087,6 +1164,10 @@ void hisi_qm_debug_init(struct hisi_qm *qm) debugfs_create_file("status", 0444, qm->debug.qm_d, qm, &qm_status_fops); + + debugfs_create_u32("dev_state", 0444, qm->debug.qm_d, &dev_dfx->dev_state); + debugfs_create_u32("dev_timeout", 0644, qm->debug.qm_d, &dev_dfx->dev_timeout); + for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) { data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset); debugfs_create_file(qm_dfx_files[i].name, diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 3255b2a070..10aa4da933 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -106,7 +106,7 @@ #define HPRE_SHAPER_TYPE_RATE 640 #define HPRE_VIA_MSI_DSM 1 #define HPRE_SQE_MASK_OFFSET 8 -#define HPRE_SQE_MASK_LEN 24 +#define HPRE_SQE_MASK_LEN 44 #define HPRE_CTX_Q_NUM_DEF 1 #define HPRE_DFX_BASE 0x301000 @@ -440,7 +440,7 @@ MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)"); struct hisi_qp *hpre_create_qp(u8 type) { - int node = cpu_to_node(smp_processor_id()); + int node = cpu_to_node(raw_smp_processor_id()); struct hisi_qp *qp = NULL; int ret; @@ -1074,41 +1074,40 @@ static int hpre_debugfs_init(struct hisi_qm *qm) struct device *dev = &qm->pdev->dev; int ret; - qm->debug.debug_root = debugfs_create_dir(dev_name(dev), - hpre_debugfs_root); - - qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; ret = hisi_qm_regs_debugfs_init(qm, hpre_diff_regs, ARRAY_SIZE(hpre_diff_regs)); if (ret) { dev_warn(dev, "Failed to init HPRE diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + hpre_debugfs_root); + qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; + hisi_qm_debug_init(qm); if (qm->pdev->device == PCI_DEVICE_ID_HUAWEI_HPRE_PF) { ret = hpre_ctrl_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; } hpre_dfx_debug_init(qm); return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); debugfs_remove: debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); return ret; } static void hpre_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); } static int hpre_pre_store_cap_reg(struct hisi_qm *qm) diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 4b20b94e63..3dac8d8e85 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -236,6 +236,12 @@ #define QM_DEV_ALG_MAX_LEN 256 + /* abnormal status value for stopping queue */ +#define QM_STOP_QUEUE_FAIL 1 +#define QM_DUMP_SQC_FAIL 3 +#define QM_DUMP_CQC_FAIL 4 +#define QM_FINISH_WAIT 5 + #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ (((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \ ((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT) | \ @@ -312,6 +318,7 @@ static const struct hisi_qm_cap_info qm_cap_info_comm[] = { {QM_SUPPORT_DB_ISOLATION, 0x30, 0, BIT(0), 0x0, 0x0, 0x0}, {QM_SUPPORT_FUNC_QOS, 0x3100, 0, BIT(8), 0x0, 0x0, 0x1}, {QM_SUPPORT_STOP_QP, 0x3100, 0, BIT(9), 0x0, 0x0, 0x1}, + {QM_SUPPORT_STOP_FUNC, 0x3100, 0, BIT(10), 0x0, 0x0, 0x1}, {QM_SUPPORT_MB_COMMAND, 0x3100, 0, BIT(11), 0x0, 0x0, 0x1}, {QM_SUPPORT_SVA_PREFETCH, 0x3100, 0, BIT(14), 0x0, 0x0, 0x1}, }; @@ -638,6 +645,9 @@ int qm_set_and_get_xqc(struct hisi_qm *qm, u8 cmd, void *xqc, u32 qp_id, bool op tmp_xqc = qm->xqc_buf.aeqc; xqc_dma = qm->xqc_buf.aeqc_dma; break; + default: + dev_err(&qm->pdev->dev, "unknown mailbox cmd %u\n", cmd); + return -EINVAL; } /* Setting xqc will fail if master OOO is blocked. */ @@ -1674,6 +1684,11 @@ unlock: return ret; } +static int qm_drain_qm(struct hisi_qm *qm) +{ + return hisi_qm_mb(qm, QM_MB_CMD_FLUSH_QM, 0, 0, 0); +} + static int qm_stop_qp(struct hisi_qp *qp) { return hisi_qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0); @@ -2031,43 +2046,25 @@ static void qp_stop_fail_cb(struct hisi_qp *qp) } } -/** - * qm_drain_qp() - Drain a qp. - * @qp: The qp we want to drain. - * - * Determine whether the queue is cleared by judging the tail pointers of - * sq and cq. - */ -static int qm_drain_qp(struct hisi_qp *qp) +static int qm_wait_qp_empty(struct hisi_qm *qm, u32 *state, u32 qp_id) { - struct hisi_qm *qm = qp->qm; struct device *dev = &qm->pdev->dev; struct qm_sqc sqc; struct qm_cqc cqc; int ret, i = 0; - /* No need to judge if master OOO is blocked. */ - if (qm_check_dev_error(qm)) - return 0; - - /* Kunpeng930 supports drain qp by device */ - if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) { - ret = qm_stop_qp(qp); - if (ret) - dev_err(dev, "Failed to stop qp(%u)!\n", qp->qp_id); - return ret; - } - while (++i) { - ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp->qp_id, 1); + ret = qm_set_and_get_xqc(qm, QM_MB_CMD_SQC, &sqc, qp_id, 1); if (ret) { dev_err_ratelimited(dev, "Failed to dump sqc!\n"); + *state = QM_DUMP_SQC_FAIL; return ret; } - ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp->qp_id, 1); + ret = qm_set_and_get_xqc(qm, QM_MB_CMD_CQC, &cqc, qp_id, 1); if (ret) { dev_err_ratelimited(dev, "Failed to dump cqc!\n"); + *state = QM_DUMP_CQC_FAIL; return ret; } @@ -2076,8 +2073,9 @@ static int qm_drain_qp(struct hisi_qp *qp) break; if (i == MAX_WAIT_COUNTS) { - dev_err(dev, "Fail to empty queue %u!\n", qp->qp_id); - return -EBUSY; + dev_err(dev, "Fail to empty queue %u!\n", qp_id); + *state = QM_STOP_QUEUE_FAIL; + return -ETIMEDOUT; } usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX); @@ -2086,9 +2084,53 @@ static int qm_drain_qp(struct hisi_qp *qp) return 0; } -static int qm_stop_qp_nolock(struct hisi_qp *qp) +/** + * qm_drain_qp() - Drain a qp. + * @qp: The qp we want to drain. + * + * If the device does not support stopping queue by sending mailbox, + * determine whether the queue is cleared by judging the tail pointers of + * sq and cq. + */ +static int qm_drain_qp(struct hisi_qp *qp) +{ + struct hisi_qm *qm = qp->qm; + struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(qm->pdev)); + u32 state = 0; + int ret; + + /* No need to judge if master OOO is blocked. */ + if (qm_check_dev_error(pf_qm)) + return 0; + + /* HW V3 supports drain qp by device */ + if (test_bit(QM_SUPPORT_STOP_QP, &qm->caps)) { + ret = qm_stop_qp(qp); + if (ret) { + dev_err(&qm->pdev->dev, "Failed to stop qp!\n"); + state = QM_STOP_QUEUE_FAIL; + goto set_dev_state; + } + return ret; + } + + ret = qm_wait_qp_empty(qm, &state, qp->qp_id); + if (ret) + goto set_dev_state; + + return 0; + +set_dev_state: + if (qm->debug.dev_dfx.dev_timeout) + qm->debug.dev_dfx.dev_state = state; + + return ret; +} + +static void qm_stop_qp_nolock(struct hisi_qp *qp) { - struct device *dev = &qp->qm->pdev->dev; + struct hisi_qm *qm = qp->qm; + struct device *dev = &qm->pdev->dev; int ret; /* @@ -2099,39 +2141,36 @@ static int qm_stop_qp_nolock(struct hisi_qp *qp) */ if (atomic_read(&qp->qp_status.flags) != QP_START) { qp->is_resetting = false; - return 0; + return; } atomic_set(&qp->qp_status.flags, QP_STOP); - ret = qm_drain_qp(qp); - if (ret) - dev_err(dev, "Failed to drain out data for stopping!\n"); + /* V3 supports direct stop function when FLR prepare */ + if (qm->ver < QM_HW_V3 || qm->status.stop_reason == QM_NORMAL) { + ret = qm_drain_qp(qp); + if (ret) + dev_err(dev, "Failed to drain out data for stopping qp(%u)!\n", qp->qp_id); + } - flush_workqueue(qp->qm->wq); + flush_workqueue(qm->wq); if (unlikely(qp->is_resetting && atomic_read(&qp->qp_status.used))) qp_stop_fail_cb(qp); dev_dbg(dev, "stop queue %u!", qp->qp_id); - - return 0; } /** * hisi_qm_stop_qp() - Stop a qp in qm. * @qp: The qp we want to stop. * - * This function is reverse of hisi_qm_start_qp. Return 0 if successful. + * This function is reverse of hisi_qm_start_qp. */ -int hisi_qm_stop_qp(struct hisi_qp *qp) +void hisi_qm_stop_qp(struct hisi_qp *qp) { - int ret; - down_write(&qp->qm->qps_lock); - ret = qm_stop_qp_nolock(qp); + qm_stop_qp_nolock(qp); up_write(&qp->qm->qps_lock); - - return ret; } EXPORT_SYMBOL_GPL(hisi_qm_stop_qp); @@ -2309,7 +2348,31 @@ static int hisi_qm_uacce_start_queue(struct uacce_queue *q) static void hisi_qm_uacce_stop_queue(struct uacce_queue *q) { - hisi_qm_stop_qp(q->priv); + struct hisi_qp *qp = q->priv; + struct hisi_qm *qm = qp->qm; + struct qm_dev_dfx *dev_dfx = &qm->debug.dev_dfx; + u32 i = 0; + + hisi_qm_stop_qp(qp); + + if (!dev_dfx->dev_timeout || !dev_dfx->dev_state) + return; + + /* + * After the queue fails to be stopped, + * wait for a period of time before releasing the queue. + */ + while (++i) { + msleep(WAIT_PERIOD); + + /* Since dev_timeout maybe modified, check i >= dev_timeout */ + if (i >= dev_dfx->dev_timeout) { + dev_err(&qm->pdev->dev, "Stop q %u timeout, state %u\n", + qp->qp_id, dev_dfx->dev_state); + dev_dfx->dev_state = QM_FINISH_WAIT; + break; + } + } } static int hisi_qm_is_q_updated(struct uacce_queue *q) @@ -2833,12 +2896,9 @@ void hisi_qm_uninit(struct hisi_qm *qm) hisi_qm_set_state(qm, QM_NOT_READY); up_write(&qm->qps_lock); + qm_remove_uacce(qm); qm_irqs_unregister(qm); hisi_qm_pci_uninit(qm); - if (qm->use_sva) { - uacce_remove(qm->uacce); - qm->uacce = NULL; - } } EXPORT_SYMBOL_GPL(hisi_qm_uninit); @@ -3054,25 +3114,18 @@ static int qm_restart(struct hisi_qm *qm) } /* Stop started qps in reset flow */ -static int qm_stop_started_qp(struct hisi_qm *qm) +static void qm_stop_started_qp(struct hisi_qm *qm) { - struct device *dev = &qm->pdev->dev; struct hisi_qp *qp; - int i, ret; + int i; for (i = 0; i < qm->qp_num; i++) { qp = &qm->qp_array[i]; - if (qp && atomic_read(&qp->qp_status.flags) == QP_START) { + if (atomic_read(&qp->qp_status.flags) == QP_START) { qp->is_resetting = true; - ret = qm_stop_qp_nolock(qp); - if (ret < 0) { - dev_err(dev, "Failed to stop qp%d!\n", i); - return ret; - } + qm_stop_qp_nolock(qp); } } - - return 0; } /** @@ -3112,21 +3165,31 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r) down_write(&qm->qps_lock); - qm->status.stop_reason = r; if (atomic_read(&qm->status.flags) == QM_STOP) goto err_unlock; /* Stop all the request sending at first. */ atomic_set(&qm->status.flags, QM_STOP); + qm->status.stop_reason = r; - if (qm->status.stop_reason == QM_SOFT_RESET || - qm->status.stop_reason == QM_DOWN) { + if (qm->status.stop_reason != QM_NORMAL) { hisi_qm_set_hw_reset(qm, QM_RESET_STOP_TX_OFFSET); - ret = qm_stop_started_qp(qm); - if (ret < 0) { - dev_err(dev, "Failed to stop started qp!\n"); - goto err_unlock; + /* + * When performing soft reset, the hardware will no longer + * do tasks, and the tasks in the device will be flushed + * out directly since the master ooo is closed. + */ + if (test_bit(QM_SUPPORT_STOP_FUNC, &qm->caps) && + r != QM_SOFT_RESET) { + ret = qm_drain_qm(qm); + if (ret) { + dev_err(dev, "failed to drain qm!\n"); + goto err_unlock; + } } + + qm_stop_started_qp(qm); + hisi_qm_set_hw_reset(qm, QM_RESET_STOP_RX_OFFSET); } @@ -3141,6 +3204,7 @@ int hisi_qm_stop(struct hisi_qm *qm, enum qm_stop_reason r) } qm_clear_queues(qm); + qm->status.stop_reason = QM_NORMAL; err_unlock: up_write(&qm->qps_lock); diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index f028dcfd0e..0558f98e22 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -118,7 +118,7 @@ struct sec_aead { }; /* Get an en/de-cipher queue cyclically to balance load over queues of TFM */ -static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req) +static inline u32 sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req) { if (req->c_req.encrypt) return (u32)atomic_inc_return(&ctx->enc_qcyclic) % @@ -481,12 +481,13 @@ static void sec_alg_resource_free(struct sec_ctx *ctx, if (ctx->pbuf_supported) sec_free_pbuf_resource(dev, qp_ctx->res); - if (ctx->alg_type == SEC_AEAD) + if (ctx->alg_type == SEC_AEAD) { sec_free_mac_resource(dev, qp_ctx->res); + sec_free_aiv_resource(dev, qp_ctx->res); + } } -static int sec_alloc_qp_ctx_resource(struct hisi_qm *qm, struct sec_ctx *ctx, - struct sec_qp_ctx *qp_ctx) +static int sec_alloc_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ctx) { u16 q_depth = qp_ctx->qp->sq_depth; struct device *dev = ctx->dev; @@ -541,8 +542,7 @@ static void sec_free_qp_ctx_resource(struct sec_ctx *ctx, struct sec_qp_ctx *qp_ kfree(qp_ctx->req_list); } -static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, - int qp_ctx_id, int alg_type) +static int sec_create_qp_ctx(struct sec_ctx *ctx, int qp_ctx_id) { struct sec_qp_ctx *qp_ctx; struct hisi_qp *qp; @@ -561,7 +561,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, idr_init(&qp_ctx->req_idr); INIT_LIST_HEAD(&qp_ctx->backlog); - ret = sec_alloc_qp_ctx_resource(qm, ctx, qp_ctx); + ret = sec_alloc_qp_ctx_resource(ctx, qp_ctx); if (ret) goto err_destroy_idr; @@ -614,7 +614,7 @@ static int sec_ctx_base_init(struct sec_ctx *ctx) } for (i = 0; i < sec->ctx_q_num; i++) { - ret = sec_create_qp_ctx(&sec->qm, ctx, i, 0); + ret = sec_create_qp_ctx(ctx, i); if (ret) goto err_sec_release_qp_ctx; } @@ -750,9 +750,7 @@ static void sec_skcipher_uninit(struct crypto_skcipher *tfm) sec_ctx_base_uninit(ctx); } -static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key, - const u32 keylen, - const enum sec_cmode c_mode) +static int sec_skcipher_3des_setkey(struct crypto_skcipher *tfm, const u8 *key, const u32 keylen) { struct sec_ctx *ctx = crypto_skcipher_ctx(tfm); struct sec_cipher_ctx *c_ctx = &ctx->c_ctx; @@ -843,7 +841,7 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, switch (c_alg) { case SEC_CALG_3DES: - ret = sec_skcipher_3des_setkey(tfm, key, keylen, c_mode); + ret = sec_skcipher_3des_setkey(tfm, key, keylen); break; case SEC_CALG_AES: case SEC_CALG_SM4: @@ -1371,7 +1369,7 @@ static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req) sec_sqe3->bd_param = cpu_to_le32(bd_param); sec_sqe3->c_len_ivin |= cpu_to_le32(c_req->c_len); - sec_sqe3->tag = cpu_to_le64(req); + sec_sqe3->tag = cpu_to_le64((unsigned long)req); return 0; } @@ -2145,8 +2143,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) return sec_skcipher_crypto(sk_req, false); } -#define SEC_SKCIPHER_GEN_ALG(sec_cra_name, sec_set_key, sec_min_key_size, \ - sec_max_key_size, ctx_init, ctx_exit, blk_size, iv_size)\ +#define SEC_SKCIPHER_ALG(sec_cra_name, sec_set_key, \ + sec_min_key_size, sec_max_key_size, blk_size, iv_size)\ {\ .base = {\ .cra_name = sec_cra_name,\ @@ -2158,8 +2156,8 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) .cra_ctxsize = sizeof(struct sec_ctx),\ .cra_module = THIS_MODULE,\ },\ - .init = ctx_init,\ - .exit = ctx_exit,\ + .init = sec_skcipher_ctx_init,\ + .exit = sec_skcipher_ctx_exit,\ .setkey = sec_set_key,\ .decrypt = sec_skcipher_decrypt,\ .encrypt = sec_skcipher_encrypt,\ @@ -2168,11 +2166,6 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req) .ivsize = iv_size,\ } -#define SEC_SKCIPHER_ALG(name, key_func, min_key_size, \ - max_key_size, blk_size, iv_size) \ - SEC_SKCIPHER_GEN_ALG(name, key_func, min_key_size, max_key_size, \ - sec_skcipher_ctx_init, sec_skcipher_ctx_exit, blk_size, iv_size) - static struct sec_skcipher sec_skciphers[] = { { .alg_msk = BIT(0), diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 7bb99381bb..75aad04ffe 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -99,8 +99,8 @@ #define SEC_DBGFS_VAL_MAX_LEN 20 #define SEC_SINGLE_PORT_MAX_TRANS 0x2060 -#define SEC_SQE_MASK_OFFSET 64 -#define SEC_SQE_MASK_LEN 48 +#define SEC_SQE_MASK_OFFSET 16 +#define SEC_SQE_MASK_LEN 108 #define SEC_SHAPER_TYPE_RATE 400 #define SEC_DFX_BASE 0x301000 @@ -152,7 +152,7 @@ static const struct hisi_qm_cap_info sec_basic_info[] = { {SEC_CORE_TYPE_NUM_CAP, 0x313c, 16, GENMASK(3, 0), 0x1, 0x1, 0x1}, {SEC_CORE_NUM_CAP, 0x313c, 8, GENMASK(7, 0), 0x4, 0x4, 0x4}, {SEC_CORES_PER_CLUSTER_NUM_CAP, 0x313c, 0, GENMASK(7, 0), 0x4, 0x4, 0x4}, - {SEC_CORE_ENABLE_BITMAP, 0x3140, 32, GENMASK(31, 0), 0x17F, 0x17F, 0xF}, + {SEC_CORE_ENABLE_BITMAP, 0x3140, 0, GENMASK(31, 0), 0x17F, 0x17F, 0xF}, {SEC_DRV_ALG_BITMAP_LOW, 0x3144, 0, GENMASK(31, 0), 0x18050CB, 0x18050CB, 0x18670CF}, {SEC_DRV_ALG_BITMAP_HIGH, 0x3148, 0, GENMASK(31, 0), 0x395C, 0x395C, 0x395C}, {SEC_DEV_ALG_BITMAP_LOW, 0x314c, 0, GENMASK(31, 0), 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF}, @@ -282,6 +282,11 @@ static const struct debugfs_reg32 sec_dfx_regs[] = { {"SEC_BD_SAA6 ", 0x301C38}, {"SEC_BD_SAA7 ", 0x301C3C}, {"SEC_BD_SAA8 ", 0x301C40}, + {"SEC_RAS_CE_ENABLE ", 0x301050}, + {"SEC_RAS_FE_ENABLE ", 0x301054}, + {"SEC_RAS_NFE_ENABLE ", 0x301058}, + {"SEC_REQ_TRNG_TIME_TH ", 0x30112C}, + {"SEC_CHANNEL_RNG_REQ_THLD ", 0x302110}, }; /* define the SEC's dfx regs region and region length */ @@ -374,7 +379,7 @@ void sec_destroy_qps(struct hisi_qp **qps, int qp_num) struct hisi_qp **sec_create_qps(void) { - int node = cpu_to_node(smp_processor_id()); + int node = cpu_to_node(raw_smp_processor_id()); u32 ctx_num = ctx_q_num; struct hisi_qp **qps; int ret; @@ -896,37 +901,36 @@ static int sec_debugfs_init(struct hisi_qm *qm) struct device *dev = &qm->pdev->dev; int ret; - qm->debug.debug_root = debugfs_create_dir(dev_name(dev), - sec_debugfs_root); - qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; - ret = hisi_qm_regs_debugfs_init(qm, sec_diff_regs, ARRAY_SIZE(sec_diff_regs)); if (ret) { dev_warn(dev, "Failed to init SEC diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + sec_debugfs_root); + qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; + hisi_qm_debug_init(qm); ret = sec_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); debugfs_remove: - debugfs_remove_recursive(sec_debugfs_root); + debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); return ret; } static void sec_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); } static int sec_show_last_regs_init(struct hisi_qm *qm) @@ -1319,7 +1323,8 @@ static struct pci_driver sec_pci_driver = { .probe = sec_probe, .remove = sec_remove, .err_handler = &sec_err_handler, - .sriov_configure = hisi_qm_sriov_configure, + .sriov_configure = IS_ENABLED(CONFIG_PCI_IOV) ? + hisi_qm_sriov_configure : NULL, .shutdown = hisi_qm_dev_shutdown, .driver.pm = &sec_pm_ops, }; diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c index 0beca257c2..568acd0aee 100644 --- a/drivers/crypto/hisilicon/sgl.c +++ b/drivers/crypto/hisilicon/sgl.c @@ -161,9 +161,6 @@ static struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool, struct mem_block *block; u32 block_index, offset; - if (!pool || !hw_sgl_dma || index >= pool->count) - return ERR_PTR(-EINVAL); - block = pool->mem_block; block_index = index / pool->sgl_num_per_block; offset = index % pool->sgl_num_per_block; @@ -230,7 +227,7 @@ hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev, struct scatterlist *sg; int sg_n; - if (!dev || !sgl || !pool || !hw_sgl_dma) + if (!dev || !sgl || !pool || !hw_sgl_dma || index >= pool->count) return ERR_PTR(-EINVAL); sg_n = sg_nents(sgl); diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c index c650c741a1..94e2d66b04 100644 --- a/drivers/crypto/hisilicon/zip/zip_crypto.c +++ b/drivers/crypto/hisilicon/zip/zip_crypto.c @@ -591,6 +591,7 @@ static struct acomp_alg hisi_zip_acomp_deflate = { .base = { .cra_name = "deflate", .cra_driver_name = "hisi-deflate-acomp", + .cra_flags = CRYPTO_ALG_ASYNC, .cra_module = THIS_MODULE, .cra_priority = HZIP_ALG_PRIORITY, .cra_ctxsize = sizeof(struct hisi_zip_ctx), diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index 479ba8a1d6..c94a7b20d0 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -454,7 +454,7 @@ MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids); int zip_create_qps(struct hisi_qp **qps, int qp_num, int node) { if (node == NUMA_NO_NODE) - node = cpu_to_node(smp_processor_id()); + node = cpu_to_node(raw_smp_processor_id()); return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps); } @@ -887,36 +887,34 @@ static int hisi_zip_ctrl_debug_init(struct hisi_qm *qm) static int hisi_zip_debugfs_init(struct hisi_qm *qm) { struct device *dev = &qm->pdev->dev; - struct dentry *dev_d; int ret; - dev_d = debugfs_create_dir(dev_name(dev), hzip_debugfs_root); - - qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; - qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; - qm->debug.debug_root = dev_d; ret = hisi_qm_regs_debugfs_init(qm, hzip_diff_regs, ARRAY_SIZE(hzip_diff_regs)); if (ret) { dev_warn(dev, "Failed to init ZIP diff regs!\n"); - goto debugfs_remove; + return ret; } + qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; + qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; + qm->debug.debug_root = debugfs_create_dir(dev_name(dev), + hzip_debugfs_root); + hisi_qm_debug_init(qm); if (qm->fun_type == QM_HW_PF) { ret = hisi_zip_ctrl_debug_init(qm); if (ret) - goto failed_to_create; + goto debugfs_remove; } hisi_zip_dfx_debug_init(qm); return 0; -failed_to_create: - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); debugfs_remove: - debugfs_remove_recursive(hzip_debugfs_root); + debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); return ret; } @@ -940,10 +938,10 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm) static void hisi_zip_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); - debugfs_remove_recursive(qm->debug.debug_root); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + if (qm->fun_type == QM_HW_PF) { hisi_zip_debug_regs_clear(qm); qm->debug.curr_qm_qp_num = 0; diff --git a/drivers/crypto/intel/iaa/iaa_crypto.h b/drivers/crypto/intel/iaa/iaa_crypto.h index 014420f7be..56985e3952 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto.h +++ b/drivers/crypto/intel/iaa/iaa_crypto.h @@ -49,20 +49,18 @@ struct iaa_wq { struct iaa_device *iaa_device; - u64 comp_calls; - u64 comp_bytes; - u64 decomp_calls; - u64 decomp_bytes; + atomic64_t comp_calls; + atomic64_t comp_bytes; + atomic64_t decomp_calls; + atomic64_t decomp_bytes; }; struct iaa_device_compression_mode { const char *name; struct aecs_comp_table_record *aecs_comp_table; - struct aecs_decomp_table_record *aecs_decomp_table; dma_addr_t aecs_comp_table_dma_addr; - dma_addr_t aecs_decomp_table_dma_addr; }; /* Representation of IAA device with wqs, populated by probe */ @@ -75,10 +73,10 @@ struct iaa_device { int n_wq; struct list_head wqs; - u64 comp_calls; - u64 comp_bytes; - u64 decomp_calls; - u64 decomp_bytes; + atomic64_t comp_calls; + atomic64_t comp_bytes; + atomic64_t decomp_calls; + atomic64_t decomp_bytes; }; struct wq_table_entry { @@ -107,23 +105,6 @@ struct aecs_comp_table_record { u32 reserved_padding[2]; } __packed; -/* AECS for decompress */ -struct aecs_decomp_table_record { - u32 crc; - u32 xor_checksum; - u32 low_filter_param; - u32 high_filter_param; - u32 output_mod_idx; - u32 drop_init_decomp_out_bytes; - u32 reserved[36]; - u32 output_accum_data[2]; - u32 out_bits_valid; - u32 bit_off_indexing; - u32 input_accum_data[64]; - u8 size_qw[32]; - u32 decomp_state[1220]; -} __packed; - int iaa_aecs_init_fixed(void); void iaa_aecs_cleanup_fixed(void); @@ -136,9 +117,6 @@ struct iaa_compression_mode { int ll_table_size; u32 *d_table; int d_table_size; - u32 *header_table; - int header_table_size; - u16 gen_decomp_table_flags; iaa_dev_comp_init_fn_t init; iaa_dev_comp_free_fn_t free; }; @@ -148,9 +126,6 @@ int add_iaa_compression_mode(const char *name, int ll_table_size, const u32 *d_table, int d_table_size, - const u8 *header_table, - int header_table_size, - u16 gen_decomp_table_flags, iaa_dev_comp_init_fn_t init, iaa_dev_comp_free_fn_t free); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c index 45cf5d74f0..19d9a333ac 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_comp_fixed.c @@ -78,7 +78,6 @@ int iaa_aecs_init_fixed(void) sizeof(fixed_ll_sym), fixed_d_sym, sizeof(fixed_d_sym), - NULL, 0, 0, init_fixed_mode, NULL); if (!ret) pr_debug("IAA fixed compression mode initialized\n"); diff --git a/drivers/crypto/intel/iaa/iaa_crypto_main.c b/drivers/crypto/intel/iaa/iaa_crypto_main.c index 64a2e87a55..e810d286ee 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_main.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_main.c @@ -258,16 +258,14 @@ static void free_iaa_compression_mode(struct iaa_compression_mode *mode) kfree(mode->name); kfree(mode->ll_table); kfree(mode->d_table); - kfree(mode->header_table); kfree(mode); } /* - * IAA Compression modes are defined by an ll_table, a d_table, and an - * optional header_table. These tables are typically generated and - * captured using statistics collected from running actual - * compress/decompress workloads. + * IAA Compression modes are defined by an ll_table and a d_table. + * These tables are typically generated and captured using statistics + * collected from running actual compress/decompress workloads. * * A module or other kernel code can add and remove compression modes * with a given name using the exported @add_iaa_compression_mode() @@ -315,9 +313,6 @@ EXPORT_SYMBOL_GPL(remove_iaa_compression_mode); * @ll_table_size: The ll table size in bytes * @d_table: The d table * @d_table_size: The d table size in bytes - * @header_table: Optional header table - * @header_table_size: Optional header table size in bytes - * @gen_decomp_table_flags: Otional flags used to generate the decomp table * @init: Optional callback function to init the compression mode data * @free: Optional callback function to free the compression mode data * @@ -330,9 +325,6 @@ int add_iaa_compression_mode(const char *name, int ll_table_size, const u32 *d_table, int d_table_size, - const u8 *header_table, - int header_table_size, - u16 gen_decomp_table_flags, iaa_dev_comp_init_fn_t init, iaa_dev_comp_free_fn_t free) { @@ -355,31 +347,19 @@ int add_iaa_compression_mode(const char *name, goto free; if (ll_table) { - mode->ll_table = kzalloc(ll_table_size, GFP_KERNEL); + mode->ll_table = kmemdup(ll_table, ll_table_size, GFP_KERNEL); if (!mode->ll_table) goto free; - memcpy(mode->ll_table, ll_table, ll_table_size); mode->ll_table_size = ll_table_size; } if (d_table) { - mode->d_table = kzalloc(d_table_size, GFP_KERNEL); + mode->d_table = kmemdup(d_table, d_table_size, GFP_KERNEL); if (!mode->d_table) goto free; - memcpy(mode->d_table, d_table, d_table_size); mode->d_table_size = d_table_size; } - if (header_table) { - mode->header_table = kzalloc(header_table_size, GFP_KERNEL); - if (!mode->header_table) - goto free; - memcpy(mode->header_table, header_table, header_table_size); - mode->header_table_size = header_table_size; - } - - mode->gen_decomp_table_flags = gen_decomp_table_flags; - mode->init = init; mode->free = free; @@ -420,10 +400,6 @@ static void free_device_compression_mode(struct iaa_device *iaa_device, if (device_mode->aecs_comp_table) dma_free_coherent(dev, size, device_mode->aecs_comp_table, device_mode->aecs_comp_table_dma_addr); - if (device_mode->aecs_decomp_table) - dma_free_coherent(dev, size, device_mode->aecs_decomp_table, - device_mode->aecs_decomp_table_dma_addr); - kfree(device_mode); } @@ -440,73 +416,6 @@ static int check_completion(struct device *dev, bool compress, bool only_once); -static int decompress_header(struct iaa_device_compression_mode *device_mode, - struct iaa_compression_mode *mode, - struct idxd_wq *wq) -{ - dma_addr_t src_addr, src2_addr; - struct idxd_desc *idxd_desc; - struct iax_hw_desc *desc; - struct device *dev; - int ret = 0; - - idxd_desc = idxd_alloc_desc(wq, IDXD_OP_BLOCK); - if (IS_ERR(idxd_desc)) - return PTR_ERR(idxd_desc); - - desc = idxd_desc->iax_hw; - - dev = &wq->idxd->pdev->dev; - - src_addr = dma_map_single(dev, (void *)mode->header_table, - mode->header_table_size, DMA_TO_DEVICE); - dev_dbg(dev, "%s: mode->name %s, src_addr %llx, dev %p, src %p, slen %d\n", - __func__, mode->name, src_addr, dev, - mode->header_table, mode->header_table_size); - if (unlikely(dma_mapping_error(dev, src_addr))) { - dev_dbg(dev, "dma_map_single err, exiting\n"); - ret = -ENOMEM; - return ret; - } - - desc->flags = IAX_AECS_GEN_FLAG; - desc->opcode = IAX_OPCODE_DECOMPRESS; - - desc->src1_addr = (u64)src_addr; - desc->src1_size = mode->header_table_size; - - src2_addr = device_mode->aecs_decomp_table_dma_addr; - desc->src2_addr = (u64)src2_addr; - desc->src2_size = 1088; - dev_dbg(dev, "%s: mode->name %s, src2_addr %llx, dev %p, src2_size %d\n", - __func__, mode->name, desc->src2_addr, dev, desc->src2_size); - desc->max_dst_size = 0; // suppressed output - - desc->decompr_flags = mode->gen_decomp_table_flags; - - desc->priv = 0; - - desc->completion_addr = idxd_desc->compl_dma; - - ret = idxd_submit_desc(wq, idxd_desc); - if (ret) { - pr_err("%s: submit_desc failed ret=0x%x\n", __func__, ret); - goto out; - } - - ret = check_completion(dev, idxd_desc->iax_completion, false, false); - if (ret) - dev_dbg(dev, "%s: mode->name %s check_completion failed ret=%d\n", - __func__, mode->name, ret); - else - dev_dbg(dev, "%s: mode->name %s succeeded\n", __func__, - mode->name); -out: - dma_unmap_single(dev, src_addr, 1088, DMA_TO_DEVICE); - - return ret; -} - static int init_device_compression_mode(struct iaa_device *iaa_device, struct iaa_compression_mode *mode, int idx, struct idxd_wq *wq) @@ -529,24 +438,11 @@ static int init_device_compression_mode(struct iaa_device *iaa_device, if (!device_mode->aecs_comp_table) goto free; - device_mode->aecs_decomp_table = dma_alloc_coherent(dev, size, - &device_mode->aecs_decomp_table_dma_addr, GFP_KERNEL); - if (!device_mode->aecs_decomp_table) - goto free; - /* Add Huffman table to aecs */ memset(device_mode->aecs_comp_table, 0, sizeof(*device_mode->aecs_comp_table)); memcpy(device_mode->aecs_comp_table->ll_sym, mode->ll_table, mode->ll_table_size); memcpy(device_mode->aecs_comp_table->d_sym, mode->d_table, mode->d_table_size); - if (mode->header_table) { - ret = decompress_header(device_mode, mode, wq); - if (ret) { - pr_debug("iaa header decompression failed: ret=%d\n", ret); - goto free; - } - } - if (mode->init) { ret = mode->init(device_mode); if (ret) @@ -1024,7 +920,7 @@ static void rebalance_wq_table(void) for_each_node_with_cpus(node) { node_cpus = cpumask_of_node(node); - for (cpu = 0; cpu < nr_cpus_per_node; cpu++) { + for (cpu = 0; cpu < cpumask_weight(node_cpus); cpu++) { int node_cpu = cpumask_nth(cpu, node_cpus); if (WARN_ON(node_cpu >= nr_cpu_ids)) { @@ -1181,8 +1077,8 @@ static void iaa_desc_complete(struct idxd_desc *idxd_desc, update_total_comp_bytes_out(ctx->req->dlen); update_wq_comp_bytes(iaa_wq->wq, ctx->req->dlen); } else { - update_total_decomp_bytes_in(ctx->req->dlen); - update_wq_decomp_bytes(iaa_wq->wq, ctx->req->dlen); + update_total_decomp_bytes_in(ctx->req->slen); + update_wq_decomp_bytes(iaa_wq->wq, ctx->req->slen); } if (ctx->compress && compression_ctx->verify_compress) { @@ -1920,6 +1816,7 @@ static struct acomp_alg iaa_acomp_fixed_deflate = { .base = { .cra_name = "deflate", .cra_driver_name = "deflate-iaa", + .cra_flags = CRYPTO_ALG_ASYNC, .cra_ctxsize = sizeof(struct iaa_compression_ctx), .cra_module = THIS_MODULE, .cra_priority = IAA_ALG_PRIORITY, @@ -2106,7 +2003,7 @@ static int __init iaa_crypto_init_module(void) int ret = 0; int node; - nr_cpus = num_online_cpus(); + nr_cpus = num_possible_cpus(); for_each_node_with_cpus(node) nr_nodes++; if (!nr_nodes) { diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.c b/drivers/crypto/intel/iaa/iaa_crypto_stats.c index 2e3b7b73af..f5cc3d29ca 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.c +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.c @@ -17,165 +17,117 @@ #include "iaa_crypto.h" #include "iaa_crypto_stats.h" -static u64 total_comp_calls; -static u64 total_decomp_calls; -static u64 total_sw_decomp_calls; -static u64 max_comp_delay_ns; -static u64 max_decomp_delay_ns; -static u64 max_acomp_delay_ns; -static u64 max_adecomp_delay_ns; -static u64 total_comp_bytes_out; -static u64 total_decomp_bytes_in; -static u64 total_completion_einval_errors; -static u64 total_completion_timeout_errors; -static u64 total_completion_comp_buf_overflow_errors; +static atomic64_t total_comp_calls; +static atomic64_t total_decomp_calls; +static atomic64_t total_sw_decomp_calls; +static atomic64_t total_comp_bytes_out; +static atomic64_t total_decomp_bytes_in; +static atomic64_t total_completion_einval_errors; +static atomic64_t total_completion_timeout_errors; +static atomic64_t total_completion_comp_buf_overflow_errors; static struct dentry *iaa_crypto_debugfs_root; void update_total_comp_calls(void) { - total_comp_calls++; + atomic64_inc(&total_comp_calls); } void update_total_comp_bytes_out(int n) { - total_comp_bytes_out += n; + atomic64_add(n, &total_comp_bytes_out); } void update_total_decomp_calls(void) { - total_decomp_calls++; + atomic64_inc(&total_decomp_calls); } void update_total_sw_decomp_calls(void) { - total_sw_decomp_calls++; + atomic64_inc(&total_sw_decomp_calls); } void update_total_decomp_bytes_in(int n) { - total_decomp_bytes_in += n; + atomic64_add(n, &total_decomp_bytes_in); } void update_completion_einval_errs(void) { - total_completion_einval_errors++; + atomic64_inc(&total_completion_einval_errors); } void update_completion_timeout_errs(void) { - total_completion_timeout_errors++; + atomic64_inc(&total_completion_timeout_errors); } void update_completion_comp_buf_overflow_errs(void) { - total_completion_comp_buf_overflow_errors++; -} - -void update_max_comp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_comp_delay_ns) - max_comp_delay_ns = time_diff; -} - -void update_max_decomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_decomp_delay_ns) - max_decomp_delay_ns = time_diff; -} - -void update_max_acomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_acomp_delay_ns) - max_acomp_delay_ns = time_diff; -} - -void update_max_adecomp_delay_ns(u64 start_time_ns) -{ - u64 time_diff; - - time_diff = ktime_get_ns() - start_time_ns; - - if (time_diff > max_adecomp_delay_ns) - max_adecomp_delay_ns = time_diff; + atomic64_inc(&total_completion_comp_buf_overflow_errors); } void update_wq_comp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->comp_calls++; - wq->iaa_device->comp_calls++; + atomic64_inc(&wq->comp_calls); + atomic64_inc(&wq->iaa_device->comp_calls); } void update_wq_comp_bytes(struct idxd_wq *idxd_wq, int n) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->comp_bytes += n; - wq->iaa_device->comp_bytes += n; + atomic64_add(n, &wq->comp_bytes); + atomic64_add(n, &wq->iaa_device->comp_bytes); } void update_wq_decomp_calls(struct idxd_wq *idxd_wq) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->decomp_calls++; - wq->iaa_device->decomp_calls++; + atomic64_inc(&wq->decomp_calls); + atomic64_inc(&wq->iaa_device->decomp_calls); } void update_wq_decomp_bytes(struct idxd_wq *idxd_wq, int n) { struct iaa_wq *wq = idxd_wq_get_private(idxd_wq); - wq->decomp_bytes += n; - wq->iaa_device->decomp_bytes += n; + atomic64_add(n, &wq->decomp_bytes); + atomic64_add(n, &wq->iaa_device->decomp_bytes); } static void reset_iaa_crypto_stats(void) { - total_comp_calls = 0; - total_decomp_calls = 0; - total_sw_decomp_calls = 0; - max_comp_delay_ns = 0; - max_decomp_delay_ns = 0; - max_acomp_delay_ns = 0; - max_adecomp_delay_ns = 0; - total_comp_bytes_out = 0; - total_decomp_bytes_in = 0; - total_completion_einval_errors = 0; - total_completion_timeout_errors = 0; - total_completion_comp_buf_overflow_errors = 0; + atomic64_set(&total_comp_calls, 0); + atomic64_set(&total_decomp_calls, 0); + atomic64_set(&total_sw_decomp_calls, 0); + atomic64_set(&total_comp_bytes_out, 0); + atomic64_set(&total_decomp_bytes_in, 0); + atomic64_set(&total_completion_einval_errors, 0); + atomic64_set(&total_completion_timeout_errors, 0); + atomic64_set(&total_completion_comp_buf_overflow_errors, 0); } static void reset_wq_stats(struct iaa_wq *wq) { - wq->comp_calls = 0; - wq->comp_bytes = 0; - wq->decomp_calls = 0; - wq->decomp_bytes = 0; + atomic64_set(&wq->comp_calls, 0); + atomic64_set(&wq->comp_bytes, 0); + atomic64_set(&wq->decomp_calls, 0); + atomic64_set(&wq->decomp_bytes, 0); } static void reset_device_stats(struct iaa_device *iaa_device) { struct iaa_wq *iaa_wq; - iaa_device->comp_calls = 0; - iaa_device->comp_bytes = 0; - iaa_device->decomp_calls = 0; - iaa_device->decomp_bytes = 0; + atomic64_set(&iaa_device->comp_calls, 0); + atomic64_set(&iaa_device->comp_bytes, 0); + atomic64_set(&iaa_device->decomp_calls, 0); + atomic64_set(&iaa_device->decomp_bytes, 0); list_for_each_entry(iaa_wq, &iaa_device->wqs, list) reset_wq_stats(iaa_wq); @@ -184,10 +136,14 @@ static void reset_device_stats(struct iaa_device *iaa_device) static void wq_show(struct seq_file *m, struct iaa_wq *iaa_wq) { seq_printf(m, " name: %s\n", iaa_wq->wq->name); - seq_printf(m, " comp_calls: %llu\n", iaa_wq->comp_calls); - seq_printf(m, " comp_bytes: %llu\n", iaa_wq->comp_bytes); - seq_printf(m, " decomp_calls: %llu\n", iaa_wq->decomp_calls); - seq_printf(m, " decomp_bytes: %llu\n\n", iaa_wq->decomp_bytes); + seq_printf(m, " comp_calls: %llu\n", + atomic64_read(&iaa_wq->comp_calls)); + seq_printf(m, " comp_bytes: %llu\n", + atomic64_read(&iaa_wq->comp_bytes)); + seq_printf(m, " decomp_calls: %llu\n", + atomic64_read(&iaa_wq->decomp_calls)); + seq_printf(m, " decomp_bytes: %llu\n\n", + atomic64_read(&iaa_wq->decomp_bytes)); } static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) @@ -197,30 +153,41 @@ static void device_stats_show(struct seq_file *m, struct iaa_device *iaa_device) seq_puts(m, "iaa device:\n"); seq_printf(m, " id: %d\n", iaa_device->idxd->id); seq_printf(m, " n_wqs: %d\n", iaa_device->n_wq); - seq_printf(m, " comp_calls: %llu\n", iaa_device->comp_calls); - seq_printf(m, " comp_bytes: %llu\n", iaa_device->comp_bytes); - seq_printf(m, " decomp_calls: %llu\n", iaa_device->decomp_calls); - seq_printf(m, " decomp_bytes: %llu\n", iaa_device->decomp_bytes); + seq_printf(m, " comp_calls: %llu\n", + atomic64_read(&iaa_device->comp_calls)); + seq_printf(m, " comp_bytes: %llu\n", + atomic64_read(&iaa_device->comp_bytes)); + seq_printf(m, " decomp_calls: %llu\n", + atomic64_read(&iaa_device->decomp_calls)); + seq_printf(m, " decomp_bytes: %llu\n", + atomic64_read(&iaa_device->decomp_bytes)); seq_puts(m, " wqs:\n"); list_for_each_entry(iaa_wq, &iaa_device->wqs, list) wq_show(m, iaa_wq); } -static void global_stats_show(struct seq_file *m) +static int global_stats_show(struct seq_file *m, void *v) { seq_puts(m, "global stats:\n"); - seq_printf(m, " total_comp_calls: %llu\n", total_comp_calls); - seq_printf(m, " total_decomp_calls: %llu\n", total_decomp_calls); - seq_printf(m, " total_sw_decomp_calls: %llu\n", total_sw_decomp_calls); - seq_printf(m, " total_comp_bytes_out: %llu\n", total_comp_bytes_out); - seq_printf(m, " total_decomp_bytes_in: %llu\n", total_decomp_bytes_in); + seq_printf(m, " total_comp_calls: %llu\n", + atomic64_read(&total_comp_calls)); + seq_printf(m, " total_decomp_calls: %llu\n", + atomic64_read(&total_decomp_calls)); + seq_printf(m, " total_sw_decomp_calls: %llu\n", + atomic64_read(&total_sw_decomp_calls)); + seq_printf(m, " total_comp_bytes_out: %llu\n", + atomic64_read(&total_comp_bytes_out)); + seq_printf(m, " total_decomp_bytes_in: %llu\n", + atomic64_read(&total_decomp_bytes_in)); seq_printf(m, " total_completion_einval_errors: %llu\n", - total_completion_einval_errors); + atomic64_read(&total_completion_einval_errors)); seq_printf(m, " total_completion_timeout_errors: %llu\n", - total_completion_timeout_errors); + atomic64_read(&total_completion_timeout_errors)); seq_printf(m, " total_completion_comp_buf_overflow_errors: %llu\n\n", - total_completion_comp_buf_overflow_errors); + atomic64_read(&total_completion_comp_buf_overflow_errors)); + + return 0; } static int wq_stats_show(struct seq_file *m, void *v) @@ -229,8 +196,6 @@ static int wq_stats_show(struct seq_file *m, void *v) mutex_lock(&iaa_devices_lock); - global_stats_show(m); - list_for_each_entry(iaa_device, &iaa_devices, list) device_stats_show(m, iaa_device); @@ -267,6 +232,18 @@ static const struct file_operations wq_stats_fops = { .release = single_release, }; +static int global_stats_open(struct inode *inode, struct file *file) +{ + return single_open(file, global_stats_show, file); +} + +static const struct file_operations global_stats_fops = { + .open = global_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + DEFINE_DEBUGFS_ATTRIBUTE(wq_stats_reset_fops, NULL, iaa_crypto_stats_reset, "%llu\n"); int __init iaa_crypto_debugfs_init(void) @@ -275,27 +252,9 @@ int __init iaa_crypto_debugfs_init(void) return -ENODEV; iaa_crypto_debugfs_root = debugfs_create_dir("iaa_crypto", NULL); - if (!iaa_crypto_debugfs_root) - return -ENOMEM; - - debugfs_create_u64("max_comp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_comp_delay_ns); - debugfs_create_u64("max_decomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_decomp_delay_ns); - debugfs_create_u64("max_acomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_comp_delay_ns); - debugfs_create_u64("max_adecomp_delay_ns", 0644, - iaa_crypto_debugfs_root, &max_decomp_delay_ns); - debugfs_create_u64("total_comp_calls", 0644, - iaa_crypto_debugfs_root, &total_comp_calls); - debugfs_create_u64("total_decomp_calls", 0644, - iaa_crypto_debugfs_root, &total_decomp_calls); - debugfs_create_u64("total_sw_decomp_calls", 0644, - iaa_crypto_debugfs_root, &total_sw_decomp_calls); - debugfs_create_u64("total_comp_bytes_out", 0644, - iaa_crypto_debugfs_root, &total_comp_bytes_out); - debugfs_create_u64("total_decomp_bytes_in", 0644, - iaa_crypto_debugfs_root, &total_decomp_bytes_in); + + debugfs_create_file("global_stats", 0644, iaa_crypto_debugfs_root, NULL, + &global_stats_fops); debugfs_create_file("wq_stats", 0644, iaa_crypto_debugfs_root, NULL, &wq_stats_fops); debugfs_create_file("stats_reset", 0644, iaa_crypto_debugfs_root, NULL, diff --git a/drivers/crypto/intel/iaa/iaa_crypto_stats.h b/drivers/crypto/intel/iaa/iaa_crypto_stats.h index c10b87b86f..3787a5f507 100644 --- a/drivers/crypto/intel/iaa/iaa_crypto_stats.h +++ b/drivers/crypto/intel/iaa/iaa_crypto_stats.h @@ -13,10 +13,6 @@ void update_total_comp_bytes_out(int n); void update_total_decomp_calls(void); void update_total_sw_decomp_calls(void); void update_total_decomp_bytes_in(int n); -void update_max_comp_delay_ns(u64 start_time_ns); -void update_max_decomp_delay_ns(u64 start_time_ns); -void update_max_acomp_delay_ns(u64 start_time_ns); -void update_max_adecomp_delay_ns(u64 start_time_ns); void update_completion_einval_errs(void); void update_completion_timeout_errs(void); void update_completion_comp_buf_overflow_errs(void); @@ -35,10 +31,6 @@ static inline void update_total_comp_bytes_out(int n) {} static inline void update_total_decomp_calls(void) {} static inline void update_total_sw_decomp_calls(void) {} static inline void update_total_decomp_bytes_in(int n) {} -static inline void update_max_comp_delay_ns(u64 start_time_ns) {} -static inline void update_max_decomp_delay_ns(u64 start_time_ns) {} -static inline void update_max_acomp_delay_ns(u64 start_time_ns) {} -static inline void update_max_adecomp_delay_ns(u64 start_time_ns) {} static inline void update_completion_einval_errs(void) {} static inline void update_completion_timeout_errs(void) {} static inline void update_completion_comp_buf_overflow_errs(void) {} diff --git a/drivers/crypto/intel/qat/Kconfig b/drivers/crypto/intel/qat/Kconfig index c120f6715a..02fb8abe4e 100644 --- a/drivers/crypto/intel/qat/Kconfig +++ b/drivers/crypto/intel/qat/Kconfig @@ -106,3 +106,17 @@ config CRYPTO_DEV_QAT_C62XVF To compile this as a module, choose M here: the module will be called qat_c62xvf. + +config CRYPTO_DEV_QAT_ERROR_INJECTION + bool "Support for Intel(R) QAT Devices Heartbeat Error Injection" + depends on CRYPTO_DEV_QAT + depends on DEBUG_FS + help + Enables a mechanism that allows to inject a heartbeat error on + Intel(R) QuickAssist devices for testing purposes. + + This is intended for developer use only. + If unsure, say N. + + This functionality is available via debugfs entry of the Intel(R) + QuickAssist device diff --git a/drivers/crypto/intel/qat/qat_420xx/Makefile b/drivers/crypto/intel/qat/qat_420xx/Makefile index a90fbe00b3..45728659fb 100644 --- a/drivers/crypto/intel/qat/qat_420xx/Makefile +++ b/drivers/crypto/intel/qat/qat_420xx/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/../qat_common +ccflags-y := -I $(src)/../qat_common obj-$(CONFIG_CRYPTO_DEV_QAT_420XX) += qat_420xx.o qat_420xx-objs := adf_drv.o adf_420xx_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c index 7b8abfb797..78f0ea4925 100644 --- a/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_420xx/adf_420xx_hw_data.c @@ -10,12 +10,14 @@ #include <adf_fw_config.h> #include <adf_gen4_config.h> #include <adf_gen4_dc.h> +#include <adf_gen4_hw_csr_data.h> #include <adf_gen4_hw_data.h> #include <adf_gen4_pfvf.h> #include <adf_gen4_pm.h> #include <adf_gen4_ras.h> #include <adf_gen4_timer.h> #include <adf_gen4_tl.h> +#include <adf_gen4_vf_mig.h> #include "adf_420xx_hw_data.h" #include "icp_qat_hw.h" @@ -361,61 +363,6 @@ static u32 get_ena_thd_mask(struct adf_accel_dev *accel_dev, u32 obj_num) } } -static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev) -{ - enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { }; - const struct adf_fw_config *fw_config; - u16 ring_to_svc_map; - int i, j; - - fw_config = get_fw_config(accel_dev); - if (!fw_config) - return 0; - - /* If dcc, all rings handle compression requests */ - if (adf_get_service_enabled(accel_dev) == SVC_DCC) { - for (i = 0; i < RP_GROUP_COUNT; i++) - rps[i] = COMP; - goto set_mask; - } - - for (i = 0; i < RP_GROUP_COUNT; i++) { - switch (fw_config[i].ae_mask) { - case ADF_AE_GROUP_0: - j = RP_GROUP_0; - break; - case ADF_AE_GROUP_1: - j = RP_GROUP_1; - break; - default: - return 0; - } - - switch (fw_config[i].obj) { - case ADF_FW_SYM_OBJ: - rps[j] = SYM; - break; - case ADF_FW_ASYM_OBJ: - rps[j] = ASYM; - break; - case ADF_FW_DC_OBJ: - rps[j] = COMP; - break; - default: - rps[j] = 0; - break; - } - } - -set_mask: - ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT | - rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT | - rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT | - rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT; - - return ring_to_svc_map; -} - static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num, const char * const fw_objs[], int num_objs) { @@ -441,6 +388,20 @@ static const char *uof_get_name_420xx(struct adf_accel_dev *accel_dev, u32 obj_n return uof_get_name(accel_dev, obj_num, adf_420xx_fw_objs, num_fw_objs); } +static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + const struct adf_fw_config *fw_config; + + if (obj_num >= uof_get_num_objs(accel_dev)) + return -EINVAL; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return -EINVAL; + + return fw_config[obj_num].obj; +} + static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num) { const struct adf_fw_config *fw_config; @@ -504,12 +465,13 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id) hw_data->fw_mmp_name = ADF_420XX_MMP; hw_data->uof_get_name = uof_get_name_420xx; hw_data->uof_get_num_objs = uof_get_num_objs; + hw_data->uof_get_obj_type = uof_get_obj_type; hw_data->uof_get_ae_mask = uof_get_ae_mask; hw_data->get_rp_group = get_rp_group; hw_data->get_ena_thd_mask = get_ena_thd_mask; hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; - hw_data->get_ring_to_svc_map = get_ring_to_svc_map; + hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map; hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; hw_data->enable_pm = adf_gen4_enable_pm; @@ -527,6 +489,7 @@ void adf_init_hw_data_420xx(struct adf_hw_device_data *hw_data, u32 dev_id) adf_gen4_init_dc_ops(&hw_data->dc_ops); adf_gen4_init_ras_ops(&hw_data->ras_ops); adf_gen4_init_tl_data(&hw_data->tl_data); + adf_gen4_init_vf_mig_ops(&hw_data->vfmig_ops); adf_init_rl_data(&hw_data->rl_data); } diff --git a/drivers/crypto/intel/qat/qat_4xxx/Makefile b/drivers/crypto/intel/qat/qat_4xxx/Makefile index ff9c8b5897..9ba202079a 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/Makefile +++ b/drivers/crypto/intel/qat/qat_4xxx/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) -ccflags-y := -I $(srctree)/$(src)/../qat_common +ccflags-y := -I $(src)/../qat_common obj-$(CONFIG_CRYPTO_DEV_QAT_4XXX) += qat_4xxx.o qat_4xxx-objs := adf_drv.o adf_4xxx_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c index 7a5c5f9711..9fd7ec53b9 100644 --- a/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -10,12 +10,14 @@ #include <adf_fw_config.h> #include <adf_gen4_config.h> #include <adf_gen4_dc.h> +#include <adf_gen4_hw_csr_data.h> #include <adf_gen4_hw_data.h> #include <adf_gen4_pfvf.h> #include <adf_gen4_pm.h> #include "adf_gen4_ras.h" #include <adf_gen4_timer.h> #include <adf_gen4_tl.h> +#include <adf_gen4_vf_mig.h> #include "adf_4xxx_hw_data.h" #include "icp_qat_hw.h" @@ -320,61 +322,6 @@ static u32 get_ena_thd_mask_401xx(struct adf_accel_dev *accel_dev, u32 obj_num) } } -static u16 get_ring_to_svc_map(struct adf_accel_dev *accel_dev) -{ - enum adf_cfg_service_type rps[RP_GROUP_COUNT]; - const struct adf_fw_config *fw_config; - u16 ring_to_svc_map; - int i, j; - - fw_config = get_fw_config(accel_dev); - if (!fw_config) - return 0; - - /* If dcc, all rings handle compression requests */ - if (adf_get_service_enabled(accel_dev) == SVC_DCC) { - for (i = 0; i < RP_GROUP_COUNT; i++) - rps[i] = COMP; - goto set_mask; - } - - for (i = 0; i < RP_GROUP_COUNT; i++) { - switch (fw_config[i].ae_mask) { - case ADF_AE_GROUP_0: - j = RP_GROUP_0; - break; - case ADF_AE_GROUP_1: - j = RP_GROUP_1; - break; - default: - return 0; - } - - switch (fw_config[i].obj) { - case ADF_FW_SYM_OBJ: - rps[j] = SYM; - break; - case ADF_FW_ASYM_OBJ: - rps[j] = ASYM; - break; - case ADF_FW_DC_OBJ: - rps[j] = COMP; - break; - default: - rps[j] = 0; - break; - } - } - -set_mask: - ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT | - rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT | - rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT | - rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT; - - return ring_to_svc_map; -} - static const char *uof_get_name(struct adf_accel_dev *accel_dev, u32 obj_num, const char * const fw_objs[], int num_objs) { @@ -407,6 +354,20 @@ static const char *uof_get_name_402xx(struct adf_accel_dev *accel_dev, u32 obj_n return uof_get_name(accel_dev, obj_num, adf_402xx_fw_objs, num_fw_objs); } +static int uof_get_obj_type(struct adf_accel_dev *accel_dev, u32 obj_num) +{ + const struct adf_fw_config *fw_config; + + if (obj_num >= uof_get_num_objs(accel_dev)) + return -EINVAL; + + fw_config = get_fw_config(accel_dev); + if (!fw_config) + return -EINVAL; + + return fw_config[obj_num].obj; +} + static u32 uof_get_ae_mask(struct adf_accel_dev *accel_dev, u32 obj_num) { const struct adf_fw_config *fw_config; @@ -487,13 +448,16 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) break; } hw_data->uof_get_num_objs = uof_get_num_objs; + hw_data->uof_get_obj_type = uof_get_obj_type; hw_data->uof_get_ae_mask = uof_get_ae_mask; hw_data->get_rp_group = get_rp_group; hw_data->set_msix_rttable = adf_gen4_set_msix_default_rttable; hw_data->set_ssm_wdtimer = adf_gen4_set_ssm_wdtimer; - hw_data->get_ring_to_svc_map = get_ring_to_svc_map; + hw_data->get_ring_to_svc_map = adf_gen4_get_ring_to_svc_map; hw_data->disable_iov = adf_disable_sriov; hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; + hw_data->bank_state_save = adf_gen4_bank_state_save; + hw_data->bank_state_restore = adf_gen4_bank_state_restore; hw_data->enable_pm = adf_gen4_enable_pm; hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt; hw_data->dev_config = adf_gen4_dev_config; @@ -509,6 +473,7 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data, u32 dev_id) adf_gen4_init_dc_ops(&hw_data->dc_ops); adf_gen4_init_ras_ops(&hw_data->ras_ops); adf_gen4_init_tl_data(&hw_data->tl_data); + adf_gen4_init_vf_mig_ops(&hw_data->vfmig_ops); adf_init_rl_data(&hw_data->rl_data); } diff --git a/drivers/crypto/intel/qat/qat_c3xxx/Makefile b/drivers/crypto/intel/qat/qat_c3xxx/Makefile index 92ef416ccc..7a06ad519b 100644 --- a/drivers/crypto/intel/qat/qat_c3xxx/Makefile +++ b/drivers/crypto/intel/qat/qat_c3xxx/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/../qat_common +ccflags-y := -I $(src)/../qat_common obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXX) += qat_c3xxx.o qat_c3xxx-objs := adf_drv.o adf_c3xxx_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c index a882e0ea22..201f9412c5 100644 --- a/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -6,6 +6,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include "adf_c3xxx_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile b/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile index b6d76825a9..7ef633058c 100644 --- a/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile +++ b/drivers/crypto/intel/qat/qat_c3xxxvf/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/../qat_common +ccflags-y := -I $(src)/../qat_common obj-$(CONFIG_CRYPTO_DEV_QAT_C3XXXVF) += qat_c3xxxvf.o qat_c3xxxvf-objs := adf_drv.o adf_c3xxxvf_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index 84d9486e04..a512ca4efd 100644 --- a/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -4,6 +4,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include <adf_pfvf_vf_msg.h> diff --git a/drivers/crypto/intel/qat/qat_c62x/Makefile b/drivers/crypto/intel/qat/qat_c62x/Makefile index d581f7c87d..cc9255b3b1 100644 --- a/drivers/crypto/intel/qat/qat_c62x/Makefile +++ b/drivers/crypto/intel/qat/qat_c62x/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/../qat_common +ccflags-y := -I $(src)/../qat_common obj-$(CONFIG_CRYPTO_DEV_QAT_C62X) += qat_c62x.o qat_c62x-objs := adf_drv.o adf_c62x_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c index 48cf3eb7c7..6b5b0cf9c7 100644 --- a/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c62x/adf_c62x_hw_data.c @@ -6,6 +6,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include "adf_c62x_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_c62xvf/Makefile b/drivers/crypto/intel/qat/qat_c62xvf/Makefile index 446c3d6386..256786662d 100644 --- a/drivers/crypto/intel/qat/qat_c62xvf/Makefile +++ b/drivers/crypto/intel/qat/qat_c62xvf/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/../qat_common +ccflags-y := -I $(src)/../qat_common obj-$(CONFIG_CRYPTO_DEV_QAT_C62XVF) += qat_c62xvf.o qat_c62xvf-objs := adf_drv.o adf_c62xvf_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 751d7aa57f..4aaaaf9217 100644 --- a/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -4,6 +4,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include <adf_pfvf_vf_msg.h> diff --git a/drivers/crypto/intel/qat/qat_common/Makefile b/drivers/crypto/intel/qat/qat_common/Makefile index 6908727bff..eac73cbfdd 100644 --- a/drivers/crypto/intel/qat/qat_common/Makefile +++ b/drivers/crypto/intel/qat/qat_common/Makefile @@ -14,16 +14,20 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ adf_sysfs.o \ adf_sysfs_ras_counters.o \ + adf_gen2_hw_csr_data.o \ adf_gen2_hw_data.o \ adf_gen2_config.o \ adf_gen4_config.o \ + adf_gen4_hw_csr_data.o \ adf_gen4_hw_data.o \ + adf_gen4_vf_mig.o \ adf_gen4_pm.o \ adf_gen2_dc.o \ adf_gen4_dc.o \ adf_gen4_ras.o \ adf_gen4_timer.o \ adf_clock.o \ + adf_mstate_mgr.o \ qat_crypto.o \ qat_compression.o \ qat_comp_algs.o \ @@ -35,7 +39,8 @@ intel_qat-objs := adf_cfg.o \ adf_sysfs_rl.o \ qat_uclo.o \ qat_hal.o \ - qat_bl.o + qat_bl.o \ + qat_mig_dev.o intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o \ adf_fw_counters.o \ @@ -53,3 +58,5 @@ intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ adf_pfvf_pf_msg.o adf_pfvf_pf_proto.o \ adf_pfvf_vf_msg.o adf_pfvf_vf_proto.o \ adf_gen2_pfvf.o adf_gen4_pfvf.o + +intel_qat-$(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION) += adf_heartbeat_inject.o diff --git a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h index a16c7e6edc..7830ecb1a1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/intel/qat/qat_common/adf_accel_devices.h @@ -9,6 +9,7 @@ #include <linux/pci.h> #include <linux/ratelimit.h> #include <linux/types.h> +#include <linux/qat/qat_mig_dev.h> #include "adf_cfg_common.h" #include "adf_rl.h" #include "adf_telemetry.h" @@ -140,6 +141,40 @@ struct admin_info { u32 mailbox_offset; }; +struct ring_config { + u64 base; + u32 config; + u32 head; + u32 tail; + u32 reserved0; +}; + +struct bank_state { + u32 ringstat0; + u32 ringstat1; + u32 ringuostat; + u32 ringestat; + u32 ringnestat; + u32 ringnfstat; + u32 ringfstat; + u32 ringcstat0; + u32 ringcstat1; + u32 ringcstat2; + u32 ringcstat3; + u32 iaintflagen; + u32 iaintflagreg; + u32 iaintflagsrcsel0; + u32 iaintflagsrcsel1; + u32 iaintcolen; + u32 iaintcolctl; + u32 iaintflagandcolen; + u32 ringexpstat; + u32 ringexpintenable; + u32 ringsrvarben; + u32 reserved0; + struct ring_config rings[ADF_ETR_MAX_RINGS_PER_BANK]; +}; + struct adf_hw_csr_ops { u64 (*build_csr_ring_base_addr)(dma_addr_t addr, u32 size); u32 (*read_csr_ring_head)(void __iomem *csr_base_addr, u32 bank, @@ -150,22 +185,49 @@ struct adf_hw_csr_ops { u32 ring); void (*write_csr_ring_tail)(void __iomem *csr_base_addr, u32 bank, u32 ring, u32 value); + u32 (*read_csr_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_uo_stat)(void __iomem *csr_base_addr, u32 bank); u32 (*read_csr_e_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_ne_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_nf_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_f_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_c_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_exp_stat)(void __iomem *csr_base_addr, u32 bank); + u32 (*read_csr_exp_int_en)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_exp_int_en)(void __iomem *csr_base_addr, u32 bank, + u32 value); + u32 (*read_csr_ring_config)(void __iomem *csr_base_addr, u32 bank, + u32 ring); void (*write_csr_ring_config)(void __iomem *csr_base_addr, u32 bank, u32 ring, u32 value); + dma_addr_t (*read_csr_ring_base)(void __iomem *csr_base_addr, u32 bank, + u32 ring); void (*write_csr_ring_base)(void __iomem *csr_base_addr, u32 bank, u32 ring, dma_addr_t addr); + u32 (*read_csr_int_en)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_int_en)(void __iomem *csr_base_addr, u32 bank, + u32 value); + u32 (*read_csr_int_flag)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_flag)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_srcsel)(void __iomem *csr_base_addr, u32 bank); + void (*write_csr_int_srcsel_w_val)(void __iomem *csr_base_addr, + u32 bank, u32 value); + u32 (*read_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_col_en)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_int_col_ctl)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_int_flag_and_col)(void __iomem *csr_base_addr, + u32 bank); void (*write_csr_int_flag_and_col)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*read_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank); void (*write_csr_ring_srv_arb_en)(void __iomem *csr_base_addr, u32 bank, u32 value); + u32 (*get_int_col_ctl_enable_mask)(void); }; struct adf_cfg_device_data; @@ -197,6 +259,20 @@ struct adf_dc_ops { void (*build_deflate_ctx)(void *ctx); }; +struct qat_migdev_ops { + int (*init)(struct qat_mig_dev *mdev); + void (*cleanup)(struct qat_mig_dev *mdev); + void (*reset)(struct qat_mig_dev *mdev); + int (*open)(struct qat_mig_dev *mdev); + void (*close)(struct qat_mig_dev *mdev); + int (*suspend)(struct qat_mig_dev *mdev); + int (*resume)(struct qat_mig_dev *mdev); + int (*save_state)(struct qat_mig_dev *mdev); + int (*save_setup)(struct qat_mig_dev *mdev); + int (*load_state)(struct qat_mig_dev *mdev); + int (*load_setup)(struct qat_mig_dev *mdev, int size); +}; + struct adf_dev_err_mask { u32 cppagentcmdpar_mask; u32 parerr_ath_cph_mask; @@ -244,10 +320,15 @@ struct adf_hw_device_data { void (*enable_ints)(struct adf_accel_dev *accel_dev); void (*set_ssm_wdtimer)(struct adf_accel_dev *accel_dev); int (*ring_pair_reset)(struct adf_accel_dev *accel_dev, u32 bank_nr); + int (*bank_state_save)(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state); + int (*bank_state_restore)(struct adf_accel_dev *accel_dev, + u32 bank_number, struct bank_state *state); void (*reset_device)(struct adf_accel_dev *accel_dev); void (*set_msix_rttable)(struct adf_accel_dev *accel_dev); const char *(*uof_get_name)(struct adf_accel_dev *accel_dev, u32 obj_num); u32 (*uof_get_num_objs)(struct adf_accel_dev *accel_dev); + int (*uof_get_obj_type)(struct adf_accel_dev *accel_dev, u32 obj_num); u32 (*uof_get_ae_mask)(struct adf_accel_dev *accel_dev, u32 obj_num); int (*get_rp_group)(struct adf_accel_dev *accel_dev, u32 ae_mask); u32 (*get_ena_thd_mask)(struct adf_accel_dev *accel_dev, u32 obj_num); @@ -259,6 +340,7 @@ struct adf_hw_device_data { struct adf_dev_err_mask dev_err_mask; struct adf_rl_hw_data rl_data; struct adf_tl_hw_data tl_data; + struct qat_migdev_ops vfmig_ops; const char *fw_name; const char *fw_mmp_name; u32 fuses; @@ -315,6 +397,7 @@ struct adf_hw_device_data { #define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops) #define GET_PFVF_OPS(accel_dev) (&(accel_dev)->hw_device->pfvf_ops) #define GET_DC_OPS(accel_dev) (&(accel_dev)->hw_device->dc_ops) +#define GET_VFMIG_OPS(accel_dev) (&(accel_dev)->hw_device->vfmig_ops) #define GET_TL_DATA(accel_dev) GET_HW_DATA(accel_dev)->tl_data #define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev @@ -329,10 +412,17 @@ struct adf_fw_loader_data { struct adf_accel_vf_info { struct adf_accel_dev *accel_dev; struct mutex pf2vf_lock; /* protect CSR access for PF2VF messages */ + struct mutex pfvf_mig_lock; /* protects PFVF state for migration */ struct ratelimit_state vf2pf_ratelimit; u32 vf_nr; bool init; + bool restarting; u8 vf_compat_ver; + /* + * Private area used for device migration. + * Memory allocation and free is managed by migration driver. + */ + void *mig_priv; }; struct adf_dc_data { @@ -401,6 +491,7 @@ struct adf_accel_dev { struct adf_error_counters ras_errors; struct mutex state_lock; /* protect state of the device */ bool is_vf; + bool autoreset_on_error; u32 accel_id; }; #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_aer.c b/drivers/crypto/intel/qat/qat_common/adf_aer.c index 621d14ea3b..04260f61d0 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_aer.c +++ b/drivers/crypto/intel/qat/qat_common/adf_aer.c @@ -7,8 +7,15 @@ #include <linux/delay.h> #include "adf_accel_devices.h" #include "adf_common_drv.h" +#include "adf_pfvf_pf_msg.h" + +struct adf_fatal_error_data { + struct adf_accel_dev *accel_dev; + struct work_struct work; +}; static struct workqueue_struct *device_reset_wq; +static struct workqueue_struct *device_sriov_wq; static pci_ers_result_t adf_error_detected(struct pci_dev *pdev, pci_channel_state_t state) @@ -26,6 +33,19 @@ static pci_ers_result_t adf_error_detected(struct pci_dev *pdev, return PCI_ERS_RESULT_DISCONNECT; } + set_bit(ADF_STATUS_RESTARTING, &accel_dev->status); + if (accel_dev->hw_device->exit_arb) { + dev_dbg(&pdev->dev, "Disabling arbitration\n"); + accel_dev->hw_device->exit_arb(accel_dev); + } + adf_error_notifier(accel_dev); + adf_pf2vf_notify_fatal_error(accel_dev); + adf_dev_restarting_notify(accel_dev); + adf_pf2vf_notify_restarting(accel_dev); + adf_pf2vf_wait_for_restarting_complete(accel_dev); + pci_clear_master(pdev); + adf_dev_down(accel_dev, false); + return PCI_ERS_RESULT_NEED_RESET; } @@ -37,6 +57,13 @@ struct adf_reset_dev_data { struct work_struct reset_work; }; +/* sriov dev data */ +struct adf_sriov_dev_data { + struct adf_accel_dev *accel_dev; + struct completion compl; + struct work_struct sriov_work; +}; + void adf_reset_sbr(struct adf_accel_dev *accel_dev) { struct pci_dev *pdev = accel_to_pci_dev(accel_dev); @@ -82,35 +109,45 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) } } +static void adf_device_sriov_worker(struct work_struct *work) +{ + struct adf_sriov_dev_data *sriov_data = + container_of(work, struct adf_sriov_dev_data, sriov_work); + + adf_reenable_sriov(sriov_data->accel_dev); + complete(&sriov_data->compl); +} + static void adf_device_reset_worker(struct work_struct *work) { struct adf_reset_dev_data *reset_data = container_of(work, struct adf_reset_dev_data, reset_work); struct adf_accel_dev *accel_dev = reset_data->accel_dev; + unsigned long wait_jiffies = msecs_to_jiffies(10000); + struct adf_sriov_dev_data sriov_data; adf_dev_restarting_notify(accel_dev); if (adf_dev_restart(accel_dev)) { /* The device hanged and we can't restart it so stop here */ dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - if (reset_data->mode == ADF_DEV_RESET_ASYNC || - completion_done(&reset_data->compl)) + if (reset_data->mode == ADF_DEV_RESET_ASYNC) kfree(reset_data); WARN(1, "QAT: device restart failed. Device is unusable\n"); return; } + + sriov_data.accel_dev = accel_dev; + init_completion(&sriov_data.compl); + INIT_WORK(&sriov_data.sriov_work, adf_device_sriov_worker); + queue_work(device_sriov_wq, &sriov_data.sriov_work); + if (wait_for_completion_timeout(&sriov_data.compl, wait_jiffies)) + adf_pf2vf_notify_restarted(accel_dev); + adf_dev_restarted_notify(accel_dev); clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); - /* - * The dev is back alive. Notify the caller if in sync mode - * - * If device restart will take a more time than expected, - * the schedule_reset() function can timeout and exit. This can be - * detected by calling the completion_done() function. In this case - * the reset_data structure needs to be freed here. - */ - if (reset_data->mode == ADF_DEV_RESET_ASYNC || - completion_done(&reset_data->compl)) + /* The dev is back alive. Notify the caller if in sync mode */ + if (reset_data->mode == ADF_DEV_RESET_ASYNC) kfree(reset_data); else complete(&reset_data->compl); @@ -145,10 +182,10 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, if (!timeout) { dev_err(&GET_DEV(accel_dev), "Reset device timeout expired\n"); + cancel_work_sync(&reset_data->reset_work); ret = -EFAULT; - } else { - kfree(reset_data); } + kfree(reset_data); return ret; } return 0; @@ -157,14 +194,25 @@ static int adf_dev_aer_schedule_reset(struct adf_accel_dev *accel_dev, static pci_ers_result_t adf_slot_reset(struct pci_dev *pdev) { struct adf_accel_dev *accel_dev = adf_devmgr_pci_to_accel_dev(pdev); + int res = 0; if (!accel_dev) { pr_err("QAT: Can't find acceleration device\n"); return PCI_ERS_RESULT_DISCONNECT; } - if (adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_SYNC)) + + if (!pdev->is_busmaster) + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + res = adf_dev_up(accel_dev, false); + if (res && res != -EALREADY) return PCI_ERS_RESULT_DISCONNECT; + adf_reenable_sriov(accel_dev); + adf_pf2vf_notify_restarted(accel_dev); + adf_dev_restarted_notify(accel_dev); + clear_bit(ADF_STATUS_RESTARTING, &accel_dev->status); return PCI_ERS_RESULT_RECOVERED; } @@ -181,11 +229,62 @@ const struct pci_error_handlers adf_err_handler = { }; EXPORT_SYMBOL_GPL(adf_err_handler); +int adf_dev_autoreset(struct adf_accel_dev *accel_dev) +{ + if (accel_dev->autoreset_on_error) + return adf_dev_aer_schedule_reset(accel_dev, ADF_DEV_RESET_ASYNC); + + return 0; +} + +static void adf_notify_fatal_error_worker(struct work_struct *work) +{ + struct adf_fatal_error_data *wq_data = + container_of(work, struct adf_fatal_error_data, work); + struct adf_accel_dev *accel_dev = wq_data->accel_dev; + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + + adf_error_notifier(accel_dev); + + if (!accel_dev->is_vf) { + /* Disable arbitration to stop processing of new requests */ + if (accel_dev->autoreset_on_error && hw_device->exit_arb) + hw_device->exit_arb(accel_dev); + if (accel_dev->pf.vf_info) + adf_pf2vf_notify_fatal_error(accel_dev); + adf_dev_autoreset(accel_dev); + } + + kfree(wq_data); +} + +int adf_notify_fatal_error(struct adf_accel_dev *accel_dev) +{ + struct adf_fatal_error_data *wq_data; + + wq_data = kzalloc(sizeof(*wq_data), GFP_ATOMIC); + if (!wq_data) + return -ENOMEM; + + wq_data->accel_dev = accel_dev; + INIT_WORK(&wq_data->work, adf_notify_fatal_error_worker); + adf_misc_wq_queue_work(&wq_data->work); + + return 0; +} + int adf_init_aer(void) { device_reset_wq = alloc_workqueue("qat_device_reset_wq", WQ_MEM_RECLAIM, 0); - return !device_reset_wq ? -EFAULT : 0; + if (!device_reset_wq) + return -EFAULT; + + device_sriov_wq = alloc_workqueue("qat_device_sriov_wq", 0, 0); + if (!device_sriov_wq) + return -EFAULT; + + return 0; } void adf_exit_aer(void) @@ -193,4 +292,8 @@ void adf_exit_aer(void) if (device_reset_wq) destroy_workqueue(device_reset_wq); device_reset_wq = NULL; + + if (device_sriov_wq) + destroy_workqueue(device_sriov_wq); + device_sriov_wq = NULL; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg.c b/drivers/crypto/intel/qat/qat_common/adf_cfg.c index 8836f015c3..2cf102ad4c 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg.c +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg.c @@ -290,17 +290,19 @@ int adf_cfg_add_key_value_param(struct adf_accel_dev *accel_dev, * 3. if the key exists with the same value, then return without doing * anything (the newly created key_val is freed). */ + down_write(&cfg->lock); if (!adf_cfg_key_val_get(accel_dev, section_name, key, temp_val)) { if (strncmp(temp_val, key_val->val, sizeof(temp_val))) { adf_cfg_keyval_remove(key, section); } else { kfree(key_val); - return 0; + goto out; } } - down_write(&cfg->lock); adf_cfg_keyval_add(key_val, section); + +out: up_write(&cfg->lock); return 0; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h index 322b76903a..e015ad6cac 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/intel/qat/qat_common/adf_cfg_strings.h @@ -49,5 +49,6 @@ ADF_ETRMGR_BANK "%d" ADF_ETRMGR_CORE_AFFINITY #define ADF_ACCEL_STR "Accelerator%d" #define ADF_HEARTBEAT_TIMER "HeartbeatTimer" +#define ADF_SRIOV_ENABLED "SriovEnabled" #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h index f06188033a..3bec9e20ba 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/intel/qat/qat_common/adf_common_drv.h @@ -40,6 +40,7 @@ enum adf_event { ADF_EVENT_SHUTDOWN, ADF_EVENT_RESTARTING, ADF_EVENT_RESTARTED, + ADF_EVENT_FATAL_ERROR, }; struct service_hndl { @@ -60,6 +61,8 @@ int adf_dev_restart(struct adf_accel_dev *accel_dev); void adf_devmgr_update_class_index(struct adf_hw_device_data *hw_data); void adf_clean_vf_map(bool); +int adf_notify_fatal_error(struct adf_accel_dev *accel_dev); +void adf_error_notifier(struct adf_accel_dev *accel_dev); int adf_devmgr_add_dev(struct adf_accel_dev *accel_dev, struct adf_accel_dev *pf); void adf_devmgr_rm_dev(struct adf_accel_dev *accel_dev, @@ -84,12 +87,14 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev); extern const struct pci_error_handlers adf_err_handler; void adf_reset_sbr(struct adf_accel_dev *accel_dev); void adf_reset_flr(struct adf_accel_dev *accel_dev); +int adf_dev_autoreset(struct adf_accel_dev *accel_dev); void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); int adf_init_arb(struct adf_accel_dev *accel_dev); void adf_exit_arb(struct adf_accel_dev *accel_dev); void adf_update_ring_arb(struct adf_etr_ring_data *ring); +int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr); int adf_dev_get(struct adf_accel_dev *accel_dev); void adf_dev_put(struct adf_accel_dev *accel_dev); @@ -188,6 +193,7 @@ bool adf_misc_wq_queue_delayed_work(struct delayed_work *work, #if defined(CONFIG_PCI_IOV) int adf_sriov_configure(struct pci_dev *pdev, int numvfs); void adf_disable_sriov(struct adf_accel_dev *accel_dev); +void adf_reenable_sriov(struct adf_accel_dev *accel_dev); void adf_enable_vf2pf_interrupts(struct adf_accel_dev *accel_dev, u32 vf_mask); void adf_disable_all_vf2pf_interrupts(struct adf_accel_dev *accel_dev); bool adf_recv_and_handle_pf2vf_msg(struct adf_accel_dev *accel_dev); @@ -208,6 +214,10 @@ static inline void adf_disable_sriov(struct adf_accel_dev *accel_dev) { } +static inline void adf_reenable_sriov(struct adf_accel_dev *accel_dev) +{ +} + static inline int adf_init_pf_wq(void) { return 0; @@ -238,6 +248,16 @@ static inline void __iomem *adf_get_pmisc_base(struct adf_accel_dev *accel_dev) return pmisc->virt_addr; } +static inline void __iomem *adf_get_etr_base(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_bar *etr; + + etr = &GET_BARS(accel_dev)[hw_data->get_etr_bar_id(hw_data)]; + + return etr->virt_addr; +} + static inline void __iomem *adf_get_aram_base(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; diff --git a/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c b/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c index 86ee36feef..f07b748795 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c +++ b/drivers/crypto/intel/qat/qat_common/adf_dev_mgr.c @@ -60,10 +60,10 @@ static int adf_get_vf_real_id(u32 fake) /** * adf_clean_vf_map() - Cleans VF id mapings - * - * Function cleans internal ids for virtual functions. * @vf: flag indicating whether mappings is cleaned * for vfs only or for vfs and pfs + * + * Function cleans internal ids for virtual functions. */ void adf_clean_vf_map(bool vf) { diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c new file mode 100644 index 0000000000..650c9edd8a --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include <linux/types.h> +#include "adf_gen2_hw_csr_data.h" + +static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) +{ + return BUILD_RING_BASE_ADDR(addr, size); +} + +static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); +} + +static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); +} + +static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_E_STAT(csr_base_addr, bank); +} + +static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, + u32 ring, u32 value) +{ + WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); +} + +static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, + dma_addr_t addr) +{ + WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); +} + +static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); +} + +static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); +} + +static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); +} + +static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); +} + +static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); +} + +void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) +{ + csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; + csr_ops->read_csr_ring_head = read_csr_ring_head; + csr_ops->write_csr_ring_head = write_csr_ring_head; + csr_ops->read_csr_ring_tail = read_csr_ring_tail; + csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; +} +EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h new file mode 100644 index 0000000000..55058b0f9e --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_csr_data.h @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN2_HW_CSR_DATA_H_ +#define ADF_GEN2_HW_CSR_DATA_H_ + +#include <linux/bitops.h> +#include "adf_accel_devices.h" + +#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL +#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL +#define ADF_RING_CSR_RING_CONFIG 0x000 +#define ADF_RING_CSR_RING_LBASE 0x040 +#define ADF_RING_CSR_RING_UBASE 0x080 +#define ADF_RING_CSR_RING_HEAD 0x0C0 +#define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_INT_FLAG 0x170 +#define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_SRCSEL_2 0x178 +#define ADF_RING_CSR_INT_COL_EN 0x17C +#define ADF_RING_CSR_INT_COL_CTL 0x180 +#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 +#define ADF_RING_BUNDLE_SIZE 0x1000 +#define ADF_ARB_REG_SLOT 0x1000 +#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C + +#define BUILD_RING_BASE_ADDR(addr, size) \ + (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) +#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2)) +#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_E_STAT(csr_base_addr, bank) \ + ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_E_STAT) +#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) +#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ +do { \ + u32 l_base = 0, u_base = 0; \ + l_base = (u32)((value) & 0xFFFFFFFF); \ + u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \ +} while (0) + +#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) +#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_FLAG, value) +#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ +do { \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \ +} while (0) +#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_COL_EN, value) +#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_COL_CTL, \ + ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ + ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ + ADF_RING_CSR_INT_FLAG_AND_COL, value) + +#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \ + ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \ + (ADF_ARB_REG_SLOT * (index)), value) + +void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c index d1884547b5..1f64bf49b2 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.c @@ -111,103 +111,6 @@ void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev) } EXPORT_SYMBOL_GPL(adf_gen2_enable_ints); -static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) -{ - return BUILD_RING_BASE_ADDR(addr, size); -} - -static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); -} - -static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); -} - -static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) -{ - return READ_CSR_E_STAT(csr_base_addr, bank); -} - -static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, - u32 ring, u32 value) -{ - WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); -} - -static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, - dma_addr_t addr) -{ - WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); -} - -static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, u32 value) -{ - WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); -} - -static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) -{ - WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); -} - -static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); -} - -static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); -} - -static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); -} - -static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); -} - -void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) -{ - csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; - csr_ops->read_csr_ring_head = read_csr_ring_head; - csr_ops->write_csr_ring_head = write_csr_ring_head; - csr_ops->read_csr_ring_tail = read_csr_ring_tail; - csr_ops->write_csr_ring_tail = write_csr_ring_tail; - csr_ops->read_csr_e_stat = read_csr_e_stat; - csr_ops->write_csr_ring_config = write_csr_ring_config; - csr_ops->write_csr_ring_base = write_csr_ring_base; - csr_ops->write_csr_int_flag = write_csr_int_flag; - csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; - csr_ops->write_csr_int_col_en = write_csr_int_col_en; - csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; - csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; - csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; -} -EXPORT_SYMBOL_GPL(adf_gen2_init_hw_csr_ops); - u32 adf_gen2_get_accel_cap(struct adf_accel_dev *accel_dev) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h index 6bd341061d..708e918612 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen2_hw_data.h @@ -6,78 +6,9 @@ #include "adf_accel_devices.h" #include "adf_cfg_common.h" -/* Transport access */ -#define ADF_BANK_INT_SRC_SEL_MASK_0 0x4444444CUL -#define ADF_BANK_INT_SRC_SEL_MASK_X 0x44444444UL -#define ADF_RING_CSR_RING_CONFIG 0x000 -#define ADF_RING_CSR_RING_LBASE 0x040 -#define ADF_RING_CSR_RING_UBASE 0x080 -#define ADF_RING_CSR_RING_HEAD 0x0C0 -#define ADF_RING_CSR_RING_TAIL 0x100 -#define ADF_RING_CSR_E_STAT 0x14C -#define ADF_RING_CSR_INT_FLAG 0x170 -#define ADF_RING_CSR_INT_SRCSEL 0x174 -#define ADF_RING_CSR_INT_SRCSEL_2 0x178 -#define ADF_RING_CSR_INT_COL_EN 0x17C -#define ADF_RING_CSR_INT_COL_CTL 0x180 -#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 -#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 -#define ADF_RING_BUNDLE_SIZE 0x1000 #define ADF_GEN2_RX_RINGS_OFFSET 8 #define ADF_GEN2_TX_RINGS_MASK 0xFF -#define BUILD_RING_BASE_ADDR(addr, size) \ - (((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) -#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2)) -#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2)) -#define READ_CSR_E_STAT(csr_base_addr, bank) \ - ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_E_STAT) -#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) -#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ -do { \ - u32 l_base = 0, u_base = 0; \ - l_base = (u32)((value) & 0xFFFFFFFF); \ - u_base = (u32)(((value) & 0xFFFFFFFF00000000ULL) >> 32); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_LBASE + ((ring) << 2), l_base); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_UBASE + ((ring) << 2), u_base); \ -} while (0) - -#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) -#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) -#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_FLAG, value) -#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ -do { \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK_0); \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_SRCSEL_2, ADF_BANK_INT_SRC_SEL_MASK_X); \ -} while (0) -#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_COL_EN, value) -#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_COL_CTL, \ - ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) -#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ - ADF_CSR_WR(csr_base_addr, (ADF_RING_BUNDLE_SIZE * (bank)) + \ - ADF_RING_CSR_INT_FLAG_AND_COL, value) - /* AE to function map */ #define AE2FUNCTION_MAP_A_OFFSET (0x3A400 + 0x190) #define AE2FUNCTION_MAP_B_OFFSET (0x3A400 + 0x310) @@ -106,12 +37,6 @@ do { \ #define ADF_ARB_OFFSET 0x30000 #define ADF_ARB_WRK_2_SER_MAP_OFFSET 0x180 #define ADF_ARB_CONFIG (BIT(31) | BIT(6) | BIT(0)) -#define ADF_ARB_REG_SLOT 0x1000 -#define ADF_ARB_RINGSRVARBEN_OFFSET 0x19C - -#define WRITE_CSR_RING_SRV_ARB_EN(csr_addr, index, value) \ - ADF_CSR_WR(csr_addr, ADF_ARB_RINGSRVARBEN_OFFSET + \ - (ADF_ARB_REG_SLOT * (index)), value) /* Power gating */ #define ADF_POWERGATE_DC BIT(23) @@ -158,7 +83,6 @@ u32 adf_gen2_get_num_aes(struct adf_hw_device_data *self); void adf_gen2_enable_error_correction(struct adf_accel_dev *accel_dev); void adf_gen2_cfg_iov_thds(struct adf_accel_dev *accel_dev, bool enable, int num_a_regs, int num_b_regs); -void adf_gen2_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); void adf_gen2_get_admin_info(struct admin_info *admin_csrs_info); void adf_gen2_get_arb_info(struct arb_info *arb_info); void adf_gen2_enable_ints(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c new file mode 100644 index 0000000000..6609c248aa --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include <linux/types.h> +#include "adf_gen4_hw_csr_data.h" + +static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) +{ + return BUILD_RING_BASE_ADDR(addr, size); +} + +static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); +} + +static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); +} + +static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); +} + +static u32 read_csr_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_STAT(csr_base_addr, bank); +} + +static u32 read_csr_uo_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_UO_STAT(csr_base_addr, bank); +} + +static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_E_STAT(csr_base_addr, bank); +} + +static u32 read_csr_ne_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_NE_STAT(csr_base_addr, bank); +} + +static u32 read_csr_nf_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_NF_STAT(csr_base_addr, bank); +} + +static u32 read_csr_f_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_F_STAT(csr_base_addr, bank); +} + +static u32 read_csr_c_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_C_STAT(csr_base_addr, bank); +} + +static u32 read_csr_exp_stat(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_EXP_STAT(csr_base_addr, bank); +} + +static u32 read_csr_exp_int_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_EXP_INT_EN(csr_base_addr, bank); +} + +static void write_csr_exp_int_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_EXP_INT_EN(csr_base_addr, bank, value); +} + +static u32 read_csr_ring_config(void __iomem *csr_base_addr, u32 bank, + u32 ring) +{ + return READ_CSR_RING_CONFIG(csr_base_addr, bank, ring); +} + +static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring, + u32 value) +{ + WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); +} + +static dma_addr_t read_csr_ring_base(void __iomem *csr_base_addr, u32 bank, + u32 ring) +{ + return READ_CSR_RING_BASE(csr_base_addr, bank, ring); +} + +static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, + dma_addr_t addr) +{ + WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); +} + +static u32 read_csr_int_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_EN(csr_base_addr, bank); +} + +static void write_csr_int_en(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_EN(csr_base_addr, bank, value); +} + +static u32 read_csr_int_flag(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_FLAG(csr_base_addr, bank); +} + +static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); +} + +static u32 read_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) +{ + WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); +} + +static void write_csr_int_srcsel_w_val(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_SRCSEL_W_VAL(csr_base_addr, bank, value); +} + +static u32 read_csr_int_col_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_COL_EN(csr_base_addr, bank); +} + +static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value) +{ + WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); +} + +static u32 read_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_COL_CTL(csr_base_addr, bank); +} + +static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); +} + +static u32 read_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_INT_FLAG_AND_COL(csr_base_addr, bank); +} + +static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); +} + +static u32 read_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank) +{ + return READ_CSR_RING_SRV_ARB_EN(csr_base_addr, bank); +} + +static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, + u32 value) +{ + WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); +} + +static u32 get_int_col_ctl_enable_mask(void) +{ + return ADF_RING_CSR_INT_COL_CTL_ENABLE; +} + +void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) +{ + csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; + csr_ops->read_csr_ring_head = read_csr_ring_head; + csr_ops->write_csr_ring_head = write_csr_ring_head; + csr_ops->read_csr_ring_tail = read_csr_ring_tail; + csr_ops->write_csr_ring_tail = write_csr_ring_tail; + csr_ops->read_csr_stat = read_csr_stat; + csr_ops->read_csr_uo_stat = read_csr_uo_stat; + csr_ops->read_csr_e_stat = read_csr_e_stat; + csr_ops->read_csr_ne_stat = read_csr_ne_stat; + csr_ops->read_csr_nf_stat = read_csr_nf_stat; + csr_ops->read_csr_f_stat = read_csr_f_stat; + csr_ops->read_csr_c_stat = read_csr_c_stat; + csr_ops->read_csr_exp_stat = read_csr_exp_stat; + csr_ops->read_csr_exp_int_en = read_csr_exp_int_en; + csr_ops->write_csr_exp_int_en = write_csr_exp_int_en; + csr_ops->read_csr_ring_config = read_csr_ring_config; + csr_ops->write_csr_ring_config = write_csr_ring_config; + csr_ops->read_csr_ring_base = read_csr_ring_base; + csr_ops->write_csr_ring_base = write_csr_ring_base; + csr_ops->read_csr_int_en = read_csr_int_en; + csr_ops->write_csr_int_en = write_csr_int_en; + csr_ops->read_csr_int_flag = read_csr_int_flag; + csr_ops->write_csr_int_flag = write_csr_int_flag; + csr_ops->read_csr_int_srcsel = read_csr_int_srcsel; + csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; + csr_ops->write_csr_int_srcsel_w_val = write_csr_int_srcsel_w_val; + csr_ops->read_csr_int_col_en = read_csr_int_col_en; + csr_ops->write_csr_int_col_en = write_csr_int_col_en; + csr_ops->read_csr_int_col_ctl = read_csr_int_col_ctl; + csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; + csr_ops->read_csr_int_flag_and_col = read_csr_int_flag_and_col; + csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; + csr_ops->read_csr_ring_srv_arb_en = read_csr_ring_srv_arb_en; + csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; + csr_ops->get_int_col_ctl_enable_mask = get_int_col_ctl_enable_mask; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h new file mode 100644 index 0000000000..6f33e7c87c --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_csr_data.h @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN4_HW_CSR_DATA_H_ +#define ADF_GEN4_HW_CSR_DATA_H_ + +#include <linux/bitops.h> +#include "adf_accel_devices.h" + +#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL +#define ADF_RING_CSR_RING_CONFIG 0x1000 +#define ADF_RING_CSR_RING_LBASE 0x1040 +#define ADF_RING_CSR_RING_UBASE 0x1080 +#define ADF_RING_CSR_RING_HEAD 0x0C0 +#define ADF_RING_CSR_RING_TAIL 0x100 +#define ADF_RING_CSR_STAT 0x140 +#define ADF_RING_CSR_UO_STAT 0x148 +#define ADF_RING_CSR_E_STAT 0x14C +#define ADF_RING_CSR_NE_STAT 0x150 +#define ADF_RING_CSR_NF_STAT 0x154 +#define ADF_RING_CSR_F_STAT 0x158 +#define ADF_RING_CSR_C_STAT 0x15C +#define ADF_RING_CSR_INT_FLAG_EN 0x16C +#define ADF_RING_CSR_INT_FLAG 0x170 +#define ADF_RING_CSR_INT_SRCSEL 0x174 +#define ADF_RING_CSR_INT_COL_EN 0x17C +#define ADF_RING_CSR_INT_COL_CTL 0x180 +#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 +#define ADF_RING_CSR_EXP_STAT 0x188 +#define ADF_RING_CSR_EXP_INT_EN 0x18C +#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 +#define ADF_RING_CSR_ADDR_OFFSET 0x100000 +#define ADF_RING_BUNDLE_SIZE 0x2000 +#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C + +#define BUILD_RING_BASE_ADDR(addr, size) \ + ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6) +#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2)) +#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2)) +#define READ_CSR_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_STAT) +#define READ_CSR_UO_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_UO_STAT) +#define READ_CSR_E_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT) +#define READ_CSR_NE_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_NE_STAT) +#define READ_CSR_NF_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_NF_STAT) +#define READ_CSR_F_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_F_STAT) +#define READ_CSR_C_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_C_STAT) +#define READ_CSR_EXP_STAT(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_EXP_STAT) +#define READ_CSR_EXP_INT_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_EXP_INT_EN) +#define WRITE_CSR_EXP_INT_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_EXP_INT_EN, value) +#define READ_CSR_RING_CONFIG(csr_base_addr, bank, ring) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2)) +#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) +#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ +do { \ + void __iomem *_csr_base_addr = csr_base_addr; \ + u32 _bank = bank; \ + u32 _ring = ring; \ + dma_addr_t _value = value; \ + u32 l_base = 0, u_base = 0; \ + l_base = lower_32_bits(_value); \ + u_base = upper_32_bits(_value); \ + ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (_bank) + \ + ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \ + ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (_bank) + \ + ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \ +} while (0) + +static inline u64 read_base(void __iomem *csr_base_addr, u32 bank, u32 ring) +{ + u32 l_base, u_base; + + /* + * Use special IO wrapper for ring base as LBASE and UBASE are + * not physically contigious + */ + l_base = ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + + ADF_RING_CSR_RING_LBASE + (ring << 2)); + u_base = ADF_CSR_RD(csr_base_addr, (ADF_RING_BUNDLE_SIZE * bank) + + ADF_RING_CSR_RING_UBASE + (ring << 2)); + + return (u64)u_base << 32 | (u64)l_base; +} + +#define READ_CSR_RING_BASE(csr_base_addr, bank, ring) \ + read_base((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, (bank), (ring)) + +#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) +#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) +#define READ_CSR_INT_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_FLAG_EN) +#define WRITE_CSR_INT_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_EN, (value)) +#define READ_CSR_INT_FLAG(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_FLAG) +#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG, (value)) +#define READ_CSR_INT_SRCSEL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_SRCSEL) +#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK) +#define WRITE_CSR_INT_SRCSEL_W_VAL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_SRCSEL, (value)) +#define READ_CSR_INT_COL_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_COL_EN) +#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_COL_EN, (value)) +#define READ_CSR_INT_COL_CTL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_INT_COL_CTL) +#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_COL_CTL, \ + ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) +#define READ_CSR_INT_FLAG_AND_COL(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_AND_COL) +#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_INT_FLAG_AND_COL, (value)) + +#define READ_CSR_RING_SRV_ARB_EN(csr_base_addr, bank) \ + ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_SRV_ARB_EN) +#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \ + ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ + ADF_RING_BUNDLE_SIZE * (bank) + \ + ADF_RING_CSR_RING_SRV_ARB_EN, (value)) + +void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c index f752653ccb..41a0979e68 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.c @@ -1,109 +1,14 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2020 Intel Corporation */ #include <linux/iopoll.h> +#include <asm/div64.h> #include "adf_accel_devices.h" #include "adf_cfg_services.h" #include "adf_common_drv.h" +#include "adf_fw_config.h" #include "adf_gen4_hw_data.h" #include "adf_gen4_pm.h" -static u64 build_csr_ring_base_addr(dma_addr_t addr, u32 size) -{ - return BUILD_RING_BASE_ADDR(addr, size); -} - -static u32 read_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_HEAD(csr_base_addr, bank, ring); -} - -static void write_csr_ring_head(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring) -{ - return READ_CSR_RING_TAIL(csr_base_addr, bank, ring); -} - -static void write_csr_ring_tail(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value); -} - -static u32 read_csr_e_stat(void __iomem *csr_base_addr, u32 bank) -{ - return READ_CSR_E_STAT(csr_base_addr, bank); -} - -static void write_csr_ring_config(void __iomem *csr_base_addr, u32 bank, u32 ring, - u32 value) -{ - WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value); -} - -static void write_csr_ring_base(void __iomem *csr_base_addr, u32 bank, u32 ring, - dma_addr_t addr) -{ - WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, addr); -} - -static void write_csr_int_flag(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG(csr_base_addr, bank, value); -} - -static void write_csr_int_srcsel(void __iomem *csr_base_addr, u32 bank) -{ - WRITE_CSR_INT_SRCSEL(csr_base_addr, bank); -} - -static void write_csr_int_col_en(void __iomem *csr_base_addr, u32 bank, u32 value) -{ - WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value); -} - -static void write_csr_int_col_ctl(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value); -} - -static void write_csr_int_flag_and_col(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value); -} - -static void write_csr_ring_srv_arb_en(void __iomem *csr_base_addr, u32 bank, - u32 value) -{ - WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value); -} - -void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops) -{ - csr_ops->build_csr_ring_base_addr = build_csr_ring_base_addr; - csr_ops->read_csr_ring_head = read_csr_ring_head; - csr_ops->write_csr_ring_head = write_csr_ring_head; - csr_ops->read_csr_ring_tail = read_csr_ring_tail; - csr_ops->write_csr_ring_tail = write_csr_ring_tail; - csr_ops->read_csr_e_stat = read_csr_e_stat; - csr_ops->write_csr_ring_config = write_csr_ring_config; - csr_ops->write_csr_ring_base = write_csr_ring_base; - csr_ops->write_csr_int_flag = write_csr_int_flag; - csr_ops->write_csr_int_srcsel = write_csr_int_srcsel; - csr_ops->write_csr_int_col_en = write_csr_int_col_en; - csr_ops->write_csr_int_col_ctl = write_csr_int_col_ctl; - csr_ops->write_csr_int_flag_and_col = write_csr_int_flag_and_col; - csr_ops->write_csr_ring_srv_arb_en = write_csr_ring_srv_arb_en; -} -EXPORT_SYMBOL_GPL(adf_gen4_init_hw_csr_ops); - u32 adf_gen4_get_accel_mask(struct adf_hw_device_data *self) { return ADF_GEN4_ACCELERATORS_MASK; @@ -320,8 +225,7 @@ static int reset_ring_pair(void __iomem *csr, u32 bank_number) int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number) { struct adf_hw_device_data *hw_data = accel_dev->hw_device; - u32 etr_bar_id = hw_data->get_etr_bar_id(hw_data); - void __iomem *csr; + void __iomem *csr = adf_get_etr_base(accel_dev); int ret; if (bank_number >= hw_data->num_banks) @@ -330,7 +234,6 @@ int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number) dev_dbg(&GET_DEV(accel_dev), "ring pair reset for bank:%d\n", bank_number); - csr = (&GET_BARS(accel_dev)[etr_bar_id])->virt_addr; ret = reset_ring_pair(csr, bank_number); if (ret) dev_err(&GET_DEV(accel_dev), @@ -433,3 +336,336 @@ int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev) return 0; } EXPORT_SYMBOL_GPL(adf_gen4_init_thd2arb_map); + +u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + enum adf_cfg_service_type rps[RP_GROUP_COUNT] = { }; + unsigned int ae_mask, start_id, worker_obj_cnt, i; + u16 ring_to_svc_map; + int rp_group; + + if (!hw_data->get_rp_group || !hw_data->uof_get_ae_mask || + !hw_data->uof_get_obj_type || !hw_data->uof_get_num_objs) + return 0; + + /* If dcc, all rings handle compression requests */ + if (adf_get_service_enabled(accel_dev) == SVC_DCC) { + for (i = 0; i < RP_GROUP_COUNT; i++) + rps[i] = COMP; + goto set_mask; + } + + worker_obj_cnt = hw_data->uof_get_num_objs(accel_dev) - + ADF_GEN4_ADMIN_ACCELENGINES; + start_id = worker_obj_cnt - RP_GROUP_COUNT; + + for (i = start_id; i < worker_obj_cnt; i++) { + ae_mask = hw_data->uof_get_ae_mask(accel_dev, i); + rp_group = hw_data->get_rp_group(accel_dev, ae_mask); + if (rp_group >= RP_GROUP_COUNT || rp_group < RP_GROUP_0) + return 0; + + switch (hw_data->uof_get_obj_type(accel_dev, i)) { + case ADF_FW_SYM_OBJ: + rps[rp_group] = SYM; + break; + case ADF_FW_ASYM_OBJ: + rps[rp_group] = ASYM; + break; + case ADF_FW_DC_OBJ: + rps[rp_group] = COMP; + break; + default: + rps[rp_group] = 0; + break; + } + } + +set_mask: + ring_to_svc_map = rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_0_SHIFT | + rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_1_SHIFT | + rps[RP_GROUP_0] << ADF_CFG_SERV_RING_PAIR_2_SHIFT | + rps[RP_GROUP_1] << ADF_CFG_SERV_RING_PAIR_3_SHIFT; + + return ring_to_svc_map; +} +EXPORT_SYMBOL_GPL(adf_gen4_get_ring_to_svc_map); + +/* + * adf_gen4_bank_quiesce_coal_timer() - quiesce bank coalesced interrupt timer + * @accel_dev: Pointer to the device structure + * @bank_idx: Offset to the bank within this device + * @timeout_ms: Timeout in milliseconds for the operation + * + * This function tries to quiesce the coalesced interrupt timer of a bank if + * it has been enabled and triggered. + * + * Returns 0 on success, error code otherwise + * + */ +int adf_gen4_bank_quiesce_coal_timer(struct adf_accel_dev *accel_dev, + u32 bank_idx, int timeout_ms) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_misc = adf_get_pmisc_base(accel_dev); + void __iomem *csr_etr = adf_get_etr_base(accel_dev); + u32 int_col_ctl, int_col_mask, int_col_en; + u32 e_stat, intsrc; + u64 wait_us; + int ret; + + if (timeout_ms < 0) + return -EINVAL; + + int_col_ctl = csr_ops->read_csr_int_col_ctl(csr_etr, bank_idx); + int_col_mask = csr_ops->get_int_col_ctl_enable_mask(); + if (!(int_col_ctl & int_col_mask)) + return 0; + + int_col_en = csr_ops->read_csr_int_col_en(csr_etr, bank_idx); + int_col_en &= BIT(ADF_WQM_CSR_RP_IDX_RX); + + e_stat = csr_ops->read_csr_e_stat(csr_etr, bank_idx); + if (!(~e_stat & int_col_en)) + return 0; + + wait_us = 2 * ((int_col_ctl & ~int_col_mask) << 8) * USEC_PER_SEC; + do_div(wait_us, hw_data->clock_frequency); + wait_us = min(wait_us, (u64)timeout_ms * USEC_PER_MSEC); + dev_dbg(&GET_DEV(accel_dev), + "wait for bank %d - coalesced timer expires in %llu us (max=%u ms estat=0x%x intcolen=0x%x)\n", + bank_idx, wait_us, timeout_ms, e_stat, int_col_en); + + ret = read_poll_timeout(ADF_CSR_RD, intsrc, intsrc, + ADF_COALESCED_POLL_DELAY_US, wait_us, true, + csr_misc, ADF_WQM_CSR_RPINTSOU(bank_idx)); + if (ret) + dev_warn(&GET_DEV(accel_dev), + "coalesced timer for bank %d expired (%llu us)\n", + bank_idx, wait_us); + + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_quiesce_coal_timer); + +static int drain_bank(void __iomem *csr, u32 bank_number, int timeout_us) +{ + u32 status; + + ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETCTL(bank_number), + ADF_WQM_CSR_RPRESETCTL_DRAIN); + + return read_poll_timeout(ADF_CSR_RD, status, + status & ADF_WQM_CSR_RPRESETSTS_STATUS, + ADF_RPRESET_POLL_DELAY_US, timeout_us, true, + csr, ADF_WQM_CSR_RPRESETSTS(bank_number)); +} + +void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, + u32 bank_number) +{ + void __iomem *csr = adf_get_etr_base(accel_dev); + + ADF_CSR_WR(csr, ADF_WQM_CSR_RPRESETSTS(bank_number), + ADF_WQM_CSR_RPRESETSTS_STATUS); +} + +int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev, + u32 bank_number, int timeout_us) +{ + void __iomem *csr = adf_get_etr_base(accel_dev); + int ret; + + dev_dbg(&GET_DEV(accel_dev), "Drain bank %d\n", bank_number); + + ret = drain_bank(csr, bank_number, timeout_us); + if (ret) + dev_err(&GET_DEV(accel_dev), "Bank drain failed (timeout)\n"); + else + dev_dbg(&GET_DEV(accel_dev), "Bank drain successful\n"); + + return ret; +} + +static void bank_state_save(struct adf_hw_csr_ops *ops, void __iomem *base, + u32 bank, struct bank_state *state, u32 num_rings) +{ + u32 i; + + state->ringstat0 = ops->read_csr_stat(base, bank); + state->ringuostat = ops->read_csr_uo_stat(base, bank); + state->ringestat = ops->read_csr_e_stat(base, bank); + state->ringnestat = ops->read_csr_ne_stat(base, bank); + state->ringnfstat = ops->read_csr_nf_stat(base, bank); + state->ringfstat = ops->read_csr_f_stat(base, bank); + state->ringcstat0 = ops->read_csr_c_stat(base, bank); + state->iaintflagen = ops->read_csr_int_en(base, bank); + state->iaintflagreg = ops->read_csr_int_flag(base, bank); + state->iaintflagsrcsel0 = ops->read_csr_int_srcsel(base, bank); + state->iaintcolen = ops->read_csr_int_col_en(base, bank); + state->iaintcolctl = ops->read_csr_int_col_ctl(base, bank); + state->iaintflagandcolen = ops->read_csr_int_flag_and_col(base, bank); + state->ringexpstat = ops->read_csr_exp_stat(base, bank); + state->ringexpintenable = ops->read_csr_exp_int_en(base, bank); + state->ringsrvarben = ops->read_csr_ring_srv_arb_en(base, bank); + + for (i = 0; i < num_rings; i++) { + state->rings[i].head = ops->read_csr_ring_head(base, bank, i); + state->rings[i].tail = ops->read_csr_ring_tail(base, bank, i); + state->rings[i].config = ops->read_csr_ring_config(base, bank, i); + state->rings[i].base = ops->read_csr_ring_base(base, bank, i); + } +} + +#define CHECK_STAT(op, expect_val, name, args...) \ +({ \ + u32 __expect_val = (expect_val); \ + u32 actual_val = op(args); \ + (__expect_val == actual_val) ? 0 : \ + (pr_err("QAT: Fail to restore %s register. Expected 0x%x, actual 0x%x\n", \ + name, __expect_val, actual_val), -EINVAL); \ +}) + +static int bank_state_restore(struct adf_hw_csr_ops *ops, void __iomem *base, + u32 bank, struct bank_state *state, u32 num_rings, + int tx_rx_gap) +{ + u32 val, tmp_val, i; + int ret; + + for (i = 0; i < num_rings; i++) + ops->write_csr_ring_base(base, bank, i, state->rings[i].base); + + for (i = 0; i < num_rings; i++) + ops->write_csr_ring_config(base, bank, i, state->rings[i].config); + + for (i = 0; i < num_rings / 2; i++) { + int tx = i * (tx_rx_gap + 1); + int rx = tx + tx_rx_gap; + + ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); + ops->write_csr_ring_tail(base, bank, tx, state->rings[tx].tail); + + /* + * The TX ring head needs to be updated again to make sure that + * the HW will not consider the ring as full when it is empty + * and the correct state flags are set to match the recovered state. + */ + if (state->ringestat & BIT(tx)) { + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK; + ops->write_csr_int_srcsel_w_val(base, bank, val); + ops->write_csr_ring_head(base, bank, tx, state->rings[tx].head); + } + + ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_RISE_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; + ops->write_csr_int_srcsel_w_val(base, bank, val); + + ops->write_csr_ring_head(base, bank, rx, state->rings[rx].head); + val = ops->read_csr_int_srcsel(base, bank); + val |= ADF_RP_INT_SRC_SEL_F_FALL_MASK << ADF_RP_INT_SRC_SEL_RANGE_WIDTH; + ops->write_csr_int_srcsel_w_val(base, bank, val); + + /* + * The RX ring tail needs to be updated again to make sure that + * the HW will not consider the ring as empty when it is full + * and the correct state flags are set to match the recovered state. + */ + if (state->ringfstat & BIT(rx)) + ops->write_csr_ring_tail(base, bank, rx, state->rings[rx].tail); + } + + ops->write_csr_int_flag_and_col(base, bank, state->iaintflagandcolen); + ops->write_csr_int_en(base, bank, state->iaintflagen); + ops->write_csr_int_col_en(base, bank, state->iaintcolen); + ops->write_csr_int_srcsel_w_val(base, bank, state->iaintflagsrcsel0); + ops->write_csr_exp_int_en(base, bank, state->ringexpintenable); + ops->write_csr_int_col_ctl(base, bank, state->iaintcolctl); + ops->write_csr_ring_srv_arb_en(base, bank, state->ringsrvarben); + + /* Check that all ring statuses match the saved state. */ + ret = CHECK_STAT(ops->read_csr_stat, state->ringstat0, "ringstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_e_stat, state->ringestat, "ringestat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_ne_stat, state->ringnestat, "ringnestat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_nf_stat, state->ringnfstat, "ringnfstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_f_stat, state->ringfstat, "ringfstat", + base, bank); + if (ret) + return ret; + + ret = CHECK_STAT(ops->read_csr_c_stat, state->ringcstat0, "ringcstat", + base, bank); + if (ret) + return ret; + + tmp_val = ops->read_csr_exp_stat(base, bank); + val = state->ringexpstat; + if (tmp_val && !val) { + pr_err("QAT: Bank was restored with exception: 0x%x\n", val); + return -EINVAL; + } + + return 0; +} + +int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_base = adf_get_etr_base(accel_dev); + + if (bank_number >= hw_data->num_banks || !state) + return -EINVAL; + + dev_dbg(&GET_DEV(accel_dev), "Saving state of bank %d\n", bank_number); + + bank_state_save(csr_ops, csr_base, bank_number, state, + hw_data->num_rings_per_bank); + + return 0; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_state_save); + +int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state) +{ + struct adf_hw_device_data *hw_data = GET_HW_DATA(accel_dev); + struct adf_hw_csr_ops *csr_ops = GET_CSR_OPS(accel_dev); + void __iomem *csr_base = adf_get_etr_base(accel_dev); + int ret; + + if (bank_number >= hw_data->num_banks || !state) + return -EINVAL; + + dev_dbg(&GET_DEV(accel_dev), "Restoring state of bank %d\n", bank_number); + + ret = bank_state_restore(csr_ops, csr_base, bank_number, state, + hw_data->num_rings_per_bank, hw_data->tx_rx_gap); + if (ret) + dev_err(&GET_DEV(accel_dev), + "Unable to restore state of bank %d\n", bank_number); + + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen4_bank_state_restore); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h index 7d8a774cad..8b10926ced 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_hw_data.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) */ /* Copyright(c) 2020 Intel Corporation */ -#ifndef ADF_GEN4_HW_CSR_DATA_H_ -#define ADF_GEN4_HW_CSR_DATA_H_ +#ifndef ADF_GEN4_HW_DATA_H_ +#define ADF_GEN4_HW_DATA_H_ #include <linux/units.h> @@ -54,95 +54,6 @@ #define ADF_GEN4_ADMINMSGLR_OFFSET 0x500578 #define ADF_GEN4_MAILBOX_BASE_OFFSET 0x600970 -/* Transport access */ -#define ADF_BANK_INT_SRC_SEL_MASK 0x44UL -#define ADF_RING_CSR_RING_CONFIG 0x1000 -#define ADF_RING_CSR_RING_LBASE 0x1040 -#define ADF_RING_CSR_RING_UBASE 0x1080 -#define ADF_RING_CSR_RING_HEAD 0x0C0 -#define ADF_RING_CSR_RING_TAIL 0x100 -#define ADF_RING_CSR_E_STAT 0x14C -#define ADF_RING_CSR_INT_FLAG 0x170 -#define ADF_RING_CSR_INT_SRCSEL 0x174 -#define ADF_RING_CSR_INT_COL_CTL 0x180 -#define ADF_RING_CSR_INT_FLAG_AND_COL 0x184 -#define ADF_RING_CSR_INT_COL_CTL_ENABLE 0x80000000 -#define ADF_RING_CSR_INT_COL_EN 0x17C -#define ADF_RING_CSR_ADDR_OFFSET 0x100000 -#define ADF_RING_BUNDLE_SIZE 0x2000 - -#define BUILD_RING_BASE_ADDR(addr, size) \ - ((((addr) >> 6) & (GENMASK_ULL(63, 0) << (size))) << 6) -#define READ_CSR_RING_HEAD(csr_base_addr, bank, ring) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2)) -#define READ_CSR_RING_TAIL(csr_base_addr, bank, ring) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2)) -#define READ_CSR_E_STAT(csr_base_addr, bank) \ - ADF_CSR_RD((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + ADF_RING_CSR_E_STAT) -#define WRITE_CSR_RING_CONFIG(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_CONFIG + ((ring) << 2), value) -#define WRITE_CSR_RING_BASE(csr_base_addr, bank, ring, value) \ -do { \ - void __iomem *_csr_base_addr = csr_base_addr; \ - u32 _bank = bank; \ - u32 _ring = ring; \ - dma_addr_t _value = value; \ - u32 l_base = 0, u_base = 0; \ - l_base = lower_32_bits(_value); \ - u_base = upper_32_bits(_value); \ - ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (_bank) + \ - ADF_RING_CSR_RING_LBASE + ((_ring) << 2), l_base); \ - ADF_CSR_WR((_csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (_bank) + \ - ADF_RING_CSR_RING_UBASE + ((_ring) << 2), u_base); \ -} while (0) - -#define WRITE_CSR_RING_HEAD(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_HEAD + ((ring) << 2), value) -#define WRITE_CSR_RING_TAIL(csr_base_addr, bank, ring, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_TAIL + ((ring) << 2), value) -#define WRITE_CSR_INT_FLAG(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_FLAG, (value)) -#define WRITE_CSR_INT_SRCSEL(csr_base_addr, bank) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_SRCSEL, ADF_BANK_INT_SRC_SEL_MASK) -#define WRITE_CSR_INT_COL_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_COL_EN, (value)) -#define WRITE_CSR_INT_COL_CTL(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_COL_CTL, \ - ADF_RING_CSR_INT_COL_CTL_ENABLE | (value)) -#define WRITE_CSR_INT_FLAG_AND_COL(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_INT_FLAG_AND_COL, (value)) - -/* Arbiter configuration */ -#define ADF_RING_CSR_RING_SRV_ARB_EN 0x19C - -#define WRITE_CSR_RING_SRV_ARB_EN(csr_base_addr, bank, value) \ - ADF_CSR_WR((csr_base_addr) + ADF_RING_CSR_ADDR_OFFSET, \ - ADF_RING_BUNDLE_SIZE * (bank) + \ - ADF_RING_CSR_RING_SRV_ARB_EN, (value)) - /* Default ring mapping */ #define ADF_GEN4_DEFAULT_RING_TO_SRV_MAP \ (ASYM << ADF_CFG_SERV_RING_PAIR_0_SHIFT | \ @@ -166,10 +77,20 @@ do { \ #define ADF_RPRESET_POLL_TIMEOUT_US (5 * USEC_PER_SEC) #define ADF_RPRESET_POLL_DELAY_US 20 #define ADF_WQM_CSR_RPRESETCTL_RESET BIT(0) +#define ADF_WQM_CSR_RPRESETCTL_DRAIN BIT(2) #define ADF_WQM_CSR_RPRESETCTL(bank) (0x6000 + ((bank) << 3)) #define ADF_WQM_CSR_RPRESETSTS_STATUS BIT(0) #define ADF_WQM_CSR_RPRESETSTS(bank) (ADF_WQM_CSR_RPRESETCTL(bank) + 4) +/* Ring interrupt */ +#define ADF_RP_INT_SRC_SEL_F_RISE_MASK BIT(2) +#define ADF_RP_INT_SRC_SEL_F_FALL_MASK GENMASK(2, 0) +#define ADF_RP_INT_SRC_SEL_RANGE_WIDTH 4 +#define ADF_COALESCED_POLL_TIMEOUT_US (1 * USEC_PER_SEC) +#define ADF_COALESCED_POLL_DELAY_US 1000 +#define ADF_WQM_CSR_RPINTSOU(bank) (0x200000 + ((bank) << 12)) +#define ADF_WQM_CSR_RP_IDX_RX 1 + /* Error source registers */ #define ADF_GEN4_ERRSOU0 (0x41A200) #define ADF_GEN4_ERRSOU1 (0x41A204) @@ -197,6 +118,19 @@ do { \ /* Arbiter threads mask with error value */ #define ADF_GEN4_ENA_THD_MASK_ERROR GENMASK(ADF_NUM_THREADS_PER_AE, 0) +/* PF2VM communication channel */ +#define ADF_GEN4_PF2VM_OFFSET(i) (0x40B010 + (i) * 0x20) +#define ADF_GEN4_VM2PF_OFFSET(i) (0x40B014 + (i) * 0x20) +#define ADF_GEN4_VINTMSKPF2VM_OFFSET(i) (0x40B00C + (i) * 0x20) +#define ADF_GEN4_VINTSOUPF2VM_OFFSET(i) (0x40B008 + (i) * 0x20) +#define ADF_GEN4_VINTMSK_OFFSET(i) (0x40B004 + (i) * 0x20) +#define ADF_GEN4_VINTSOU_OFFSET(i) (0x40B000 + (i) * 0x20) + +struct adf_gen4_vfmig { + struct adf_mstate_mgr *mstate_mgr; + bool bank_stopped[ADF_GEN4_NUM_BANKS_PER_VF]; +}; + void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); enum icp_qat_gen4_slice_mask { @@ -230,10 +164,20 @@ u32 adf_gen4_get_num_aes(struct adf_hw_device_data *self); enum dev_sku_info adf_gen4_get_sku(struct adf_hw_device_data *self); u32 adf_gen4_get_sram_bar_id(struct adf_hw_device_data *self); int adf_gen4_init_device(struct adf_accel_dev *accel_dev); -void adf_gen4_init_hw_csr_ops(struct adf_hw_csr_ops *csr_ops); int adf_gen4_ring_pair_reset(struct adf_accel_dev *accel_dev, u32 bank_number); void adf_gen4_set_msix_default_rttable(struct adf_accel_dev *accel_dev); void adf_gen4_set_ssm_wdtimer(struct adf_accel_dev *accel_dev); int adf_gen4_init_thd2arb_map(struct adf_accel_dev *accel_dev); +u16 adf_gen4_get_ring_to_svc_map(struct adf_accel_dev *accel_dev); +int adf_gen4_bank_quiesce_coal_timer(struct adf_accel_dev *accel_dev, + u32 bank_idx, int timeout_ms); +int adf_gen4_bank_drain_start(struct adf_accel_dev *accel_dev, + u32 bank_number, int timeout_us); +void adf_gen4_bank_drain_finish(struct adf_accel_dev *accel_dev, + u32 bank_number); +int adf_gen4_bank_state_save(struct adf_accel_dev *accel_dev, u32 bank_number, + struct bank_state *state); +int adf_gen4_bank_state_restore(struct adf_accel_dev *accel_dev, + u32 bank_number, struct bank_state *state); #endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c index 8e8efe93f3..21474d402d 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_pfvf.c @@ -6,12 +6,10 @@ #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "adf_gen4_pfvf.h" +#include "adf_gen4_hw_data.h" #include "adf_pfvf_pf_proto.h" #include "adf_pfvf_utils.h" -#define ADF_4XXX_PF2VM_OFFSET(i) (0x40B010 + ((i) * 0x20)) -#define ADF_4XXX_VM2PF_OFFSET(i) (0x40B014 + ((i) * 0x20)) - /* VF2PF interrupt source registers */ #define ADF_4XXX_VM2PF_SOU 0x41A180 #define ADF_4XXX_VM2PF_MSK 0x41A1C0 @@ -29,12 +27,12 @@ static const struct pfvf_csr_format csr_gen4_fmt = { static u32 adf_gen4_pf_get_pf2vf_offset(u32 i) { - return ADF_4XXX_PF2VM_OFFSET(i); + return ADF_GEN4_PF2VM_OFFSET(i); } static u32 adf_gen4_pf_get_vf2pf_offset(u32 i) { - return ADF_4XXX_VM2PF_OFFSET(i); + return ADF_GEN4_VM2PF_OFFSET(i); } static void adf_gen4_enable_vf2pf_interrupts(void __iomem *pmisc_addr, u32 vf_mask) diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c new file mode 100644 index 0000000000..a62eb5e8db --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.c @@ -0,0 +1,1010 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include <linux/delay.h> +#include <linux/dev_printk.h> +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/types.h> +#include <asm/errno.h> + +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "adf_gen4_hw_data.h" +#include "adf_gen4_pfvf.h" +#include "adf_pfvf_utils.h" +#include "adf_mstate_mgr.h" +#include "adf_gen4_vf_mig.h" + +#define ADF_GEN4_VF_MSTATE_SIZE 4096 +#define ADF_GEN4_PFVF_RSP_TIMEOUT_US 5000 + +static int adf_gen4_vfmig_save_setup(struct qat_mig_dev *mdev); +static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len); + +static int adf_gen4_vfmig_init_device(struct qat_mig_dev *mdev) +{ + u8 *state; + + state = kmalloc(ADF_GEN4_VF_MSTATE_SIZE, GFP_KERNEL); + if (!state) + return -ENOMEM; + + mdev->state = state; + mdev->state_size = ADF_GEN4_VF_MSTATE_SIZE; + mdev->setup_size = 0; + mdev->remote_setup_size = 0; + + return 0; +} + +static void adf_gen4_vfmig_cleanup_device(struct qat_mig_dev *mdev) +{ + kfree(mdev->state); + mdev->state = NULL; +} + +static void adf_gen4_vfmig_reset_device(struct qat_mig_dev *mdev) +{ + mdev->setup_size = 0; + mdev->remote_setup_size = 0; +} + +static int adf_gen4_vfmig_open_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + + vf_info = &accel_dev->pf.vf_info[mdev->vf_id]; + + vfmig = kzalloc(sizeof(*vfmig), GFP_KERNEL); + if (!vfmig) + return -ENOMEM; + + vfmig->mstate_mgr = adf_mstate_mgr_new(mdev->state, mdev->state_size); + if (!vfmig->mstate_mgr) { + kfree(vfmig); + return -ENOMEM; + } + vf_info->mig_priv = vfmig; + mdev->setup_size = 0; + mdev->remote_setup_size = 0; + + return 0; +} + +static void adf_gen4_vfmig_close_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + + vf_info = &accel_dev->pf.vf_info[mdev->vf_id]; + if (vf_info->mig_priv) { + vfmig = vf_info->mig_priv; + adf_mstate_mgr_destroy(vfmig->mstate_mgr); + kfree(vfmig); + vf_info->mig_priv = NULL; + } +} + +static int adf_gen4_vfmig_suspend_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vf_mig; + u32 vf_nr = mdev->vf_id; + int ret, i; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vf_mig = vf_info->mig_priv; + + /* Stop all inflight jobs */ + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + u32 pf_bank_nr = i + vf_nr * hw_data->num_banks_per_vf; + + ret = adf_gen4_bank_drain_start(accel_dev, pf_bank_nr, + ADF_RPRESET_POLL_TIMEOUT_US); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to drain bank %d for vf_nr %d\n", i, + vf_nr); + return ret; + } + vf_mig->bank_stopped[i] = true; + + adf_gen4_bank_quiesce_coal_timer(accel_dev, pf_bank_nr, + ADF_COALESCED_POLL_TIMEOUT_US); + } + + return 0; +} + +static int adf_gen4_vfmig_resume_device(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vf_mig; + u32 vf_nr = mdev->vf_id; + int i; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vf_mig = vf_info->mig_priv; + + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + u32 pf_bank_nr = i + vf_nr * hw_data->num_banks_per_vf; + + if (vf_mig->bank_stopped[i]) { + adf_gen4_bank_drain_finish(accel_dev, pf_bank_nr); + vf_mig->bank_stopped[i] = false; + } + } + + return 0; +} + +struct adf_vf_bank_info { + struct adf_accel_dev *accel_dev; + u32 vf_nr; + u32 bank_nr; +}; + +struct mig_user_sla { + enum adf_base_services srv; + u64 rp_mask; + u32 cir; + u32 pir; +}; + +static int adf_mstate_sla_check(struct adf_mstate_mgr *sub_mgr, u8 *src_buf, + u32 src_size, void *opaque) +{ + struct adf_mstate_vreginfo _sinfo = { src_buf, src_size }; + struct adf_mstate_vreginfo *sinfo = &_sinfo, *dinfo = opaque; + u32 src_sla_cnt = sinfo->size / sizeof(struct mig_user_sla); + u32 dst_sla_cnt = dinfo->size / sizeof(struct mig_user_sla); + struct mig_user_sla *src_slas = sinfo->addr; + struct mig_user_sla *dst_slas = dinfo->addr; + int i, j; + + for (i = 0; i < src_sla_cnt; i++) { + for (j = 0; j < dst_sla_cnt; j++) { + if (src_slas[i].srv != dst_slas[j].srv || + src_slas[i].rp_mask != dst_slas[j].rp_mask) + continue; + + if (src_slas[i].cir > dst_slas[j].cir || + src_slas[i].pir > dst_slas[j].pir) { + pr_err("QAT: DST VF rate limiting mismatch.\n"); + return -EINVAL; + } + break; + } + + if (j == dst_sla_cnt) { + pr_err("QAT: SRC VF rate limiting mismatch - SRC srv %d and rp_mask 0x%llx.\n", + src_slas[i].srv, src_slas[i].rp_mask); + return -EINVAL; + } + } + + return 0; +} + +static inline int adf_mstate_check_cap_size(u32 src_sz, u32 dst_sz, u32 max_sz) +{ + if (src_sz > max_sz || dst_sz > max_sz) + return -EINVAL; + else + return 0; +} + +static int adf_mstate_compatver_check(struct adf_mstate_mgr *sub_mgr, + u8 *src_buf, u32 src_sz, void *opaque) +{ + struct adf_mstate_vreginfo *info = opaque; + u8 compat = 0; + u8 *pcompat; + + if (src_sz != info->size) { + pr_debug("QAT: State mismatch (compat version size), current %u, expected %u\n", + src_sz, info->size); + return -EINVAL; + } + + memcpy(info->addr, src_buf, info->size); + pcompat = info->addr; + if (*pcompat == 0) { + pr_warn("QAT: Unable to determine the version of VF\n"); + return 0; + } + + compat = adf_vf_compat_checker(*pcompat); + if (compat == ADF_PF2VF_VF_INCOMPATIBLE) { + pr_debug("QAT: SRC VF driver (ver=%u) is incompatible with DST PF driver (ver=%u)\n", + *pcompat, ADF_PFVF_COMPAT_THIS_VERSION); + return -EINVAL; + } + + if (compat == ADF_PF2VF_VF_COMPAT_UNKNOWN) + pr_debug("QAT: SRC VF driver (ver=%u) is newer than DST PF driver (ver=%u)\n", + *pcompat, ADF_PFVF_COMPAT_THIS_VERSION); + + return 0; +} + +/* + * adf_mstate_capmask_compare() - compare QAT device capability mask + * @sinfo: Pointer to source capability info + * @dinfo: Pointer to target capability info + * + * This function compares the capability mask between source VF and target VF + * + * Returns: 0 if target capability mask is identical to source capability mask, + * 1 if target mask can represent all the capabilities represented by source mask, + * -1 if target mask can't represent all the capabilities represented by source + * mask. + */ +static int adf_mstate_capmask_compare(struct adf_mstate_vreginfo *sinfo, + struct adf_mstate_vreginfo *dinfo) +{ + u64 src = 0, dst = 0; + + if (adf_mstate_check_cap_size(sinfo->size, dinfo->size, sizeof(u64))) { + pr_debug("QAT: Unexpected capability size %u %u %zu\n", + sinfo->size, dinfo->size, sizeof(u64)); + return -1; + } + + memcpy(&src, sinfo->addr, sinfo->size); + memcpy(&dst, dinfo->addr, dinfo->size); + + pr_debug("QAT: Check cap compatibility of cap %llu %llu\n", src, dst); + + if (src == dst) + return 0; + + if ((src | dst) == dst) + return 1; + + return -1; +} + +static int adf_mstate_capmask_superset(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo sinfo = { buf, size }; + + if (adf_mstate_capmask_compare(&sinfo, opa) >= 0) + return 0; + + return -EINVAL; +} + +static int adf_mstate_capmask_equal(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo sinfo = { buf, size }; + + if (adf_mstate_capmask_compare(&sinfo, opa) == 0) + return 0; + + return -EINVAL; +} + +static int adf_mstate_set_vreg(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa) +{ + struct adf_mstate_vreginfo *info = opa; + + if (size != info->size) { + pr_debug("QAT: Unexpected cap size %u %u\n", size, info->size); + return -EINVAL; + } + memcpy(info->addr, buf, info->size); + + return 0; +} + +static u32 adf_gen4_vfmig_get_slas(struct adf_accel_dev *accel_dev, u32 vf_nr, + struct mig_user_sla *pmig_slas) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_rl *rl_data = accel_dev->rate_limiting; + struct rl_sla **sla_type_arr = NULL; + u64 rp_mask, rp_index; + u32 max_num_sla; + u32 sla_cnt = 0; + int i, j; + + if (!accel_dev->rate_limiting) + return 0; + + rp_index = vf_nr * hw_data->num_banks_per_vf; + max_num_sla = adf_rl_get_sla_arr_of_type(rl_data, RL_LEAF, &sla_type_arr); + + for (i = 0; i < max_num_sla; i++) { + if (!sla_type_arr[i]) + continue; + + rp_mask = 0; + for (j = 0; j < sla_type_arr[i]->ring_pairs_cnt; j++) + rp_mask |= BIT(sla_type_arr[i]->ring_pairs_ids[j]); + + if (rp_mask & GENMASK_ULL(rp_index + 3, rp_index)) { + pmig_slas->rp_mask = rp_mask; + pmig_slas->cir = sla_type_arr[i]->cir; + pmig_slas->pir = sla_type_arr[i]->pir; + pmig_slas->srv = sla_type_arr[i]->srv; + pmig_slas++; + sla_cnt++; + } + } + + return sla_cnt; +} + +static int adf_gen4_vfmig_load_etr_regs(struct adf_mstate_mgr *sub_mgr, + u8 *state, u32 size, void *opa) +{ + struct adf_vf_bank_info *vf_bank_info = opa; + struct adf_accel_dev *accel_dev = vf_bank_info->accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + u32 pf_bank_nr; + int ret; + + pf_bank_nr = vf_bank_info->bank_nr + vf_bank_info->vf_nr * hw_data->num_banks_per_vf; + ret = hw_data->bank_state_restore(accel_dev, pf_bank_nr, + (struct bank_state *)state); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load regs for vf%d bank%d\n", + vf_bank_info->vf_nr, vf_bank_info->bank_nr); + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_load_etr_bank(struct adf_accel_dev *accel_dev, + u32 vf_nr, u32 bank_nr, + struct adf_mstate_mgr *mstate_mgr) +{ + struct adf_vf_bank_info vf_bank_info = {accel_dev, vf_nr, bank_nr}; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + char bank_ids[ADF_MSTATE_ID_LEN]; + + snprintf(bank_ids, sizeof(bank_ids), ADF_MSTATE_BANK_IDX_IDS "%x", bank_nr); + subsec = adf_mstate_sect_lookup(mstate_mgr, bank_ids, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to lookup sec %s for vf%d bank%d\n", + ADF_MSTATE_BANK_IDX_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, ADF_MSTATE_ETR_REGS_IDS, + adf_gen4_vfmig_load_etr_regs, + &vf_bank_info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_ETR_REGS_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + return 0; +} + +static int adf_gen4_vfmig_load_etr(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec; + int ret, i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_ETRB_IDS, NULL, + NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_ETRB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + ret = adf_gen4_vfmig_load_etr_bank(accel_dev, vf_nr, i, + &sub_sects_mgr); + if (ret) + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_load_misc(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + void __iomem *csr = adf_get_pmisc_base(accel_dev); + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + u64 ofs; + } misc_states[] = { + {ADF_MSTATE_VINTMSK_IDS, ADF_GEN4_VINTMSK_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_PF2VM_IDS, ADF_GEN4_VINTMSKPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_PF2VM_IDS, ADF_GEN4_PF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VM2PF_IDS, ADF_GEN4_VM2PF_OFFSET(vf_nr)}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_MISCB_IDS, NULL, + NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_MISCB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(misc_states); i++) { + struct adf_mstate_vreginfo info; + u32 regv; + + info.addr = ®v; + info.size = sizeof(regv); + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, + misc_states[i].id, + adf_mstate_set_vreg, + &info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to load sec %s\n", misc_states[i].id); + return -EINVAL; + } + ADF_CSR_WR(csr, misc_states[i].ofs, regv); + } + + return 0; +} + +static int adf_gen4_vfmig_load_generic(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct mig_user_sla dst_slas[RL_RP_CNT_PER_LEAF_MAX] = { }; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + u32 dst_sla_cnt; + struct { + char *id; + int (*action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, void *opa); + struct adf_mstate_vreginfo info; + } gen_states[] = { + {ADF_MSTATE_IOV_INIT_IDS, adf_mstate_set_vreg, + {&vf_info->init, sizeof(vf_info->init)}}, + {ADF_MSTATE_COMPAT_VER_IDS, adf_mstate_compatver_check, + {&vf_info->vf_compat_ver, sizeof(vf_info->vf_compat_ver)}}, + {ADF_MSTATE_SLA_IDS, adf_mstate_sla_check, {dst_slas, 0}}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_GEN_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_GEN_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(gen_states); i++) { + if (gen_states[i].info.addr == dst_slas) { + dst_sla_cnt = adf_gen4_vfmig_get_slas(accel_dev, vf_nr, dst_slas); + gen_states[i].info.size = dst_sla_cnt * sizeof(struct mig_user_sla); + } + + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, + gen_states[i].id, + gen_states[i].action, + &gen_states[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + gen_states[i].id); + return -EINVAL; + } + } + + return 0; +} + +static int adf_gen4_vfmig_load_config(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + int (*action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, void *opa); + struct adf_mstate_vreginfo info; + } setups[] = { + {ADF_MSTATE_GEN_CAP_IDS, adf_mstate_capmask_superset, + {&hw_data->accel_capabilities_mask, sizeof(hw_data->accel_capabilities_mask)}}, + {ADF_MSTATE_GEN_SVCMAP_IDS, adf_mstate_capmask_equal, + {&hw_data->ring_to_svc_map, sizeof(hw_data->ring_to_svc_map)}}, + {ADF_MSTATE_GEN_EXTDC_IDS, adf_mstate_capmask_superset, + {&hw_data->extended_dc_capabilities, sizeof(hw_data->extended_dc_capabilities)}}, + }; + int i; + + subsec = adf_mstate_sect_lookup(mstate_mgr, ADF_MSTATE_CONFIG_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + ADF_MSTATE_CONFIG_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, subsec); + for (i = 0; i < ARRAY_SIZE(setups); i++) { + l2_subsec = adf_mstate_sect_lookup(&sub_sects_mgr, setups[i].id, + setups[i].action, &setups[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to load sec %s\n", + setups[i].id); + return -EINVAL; + } + } + + return 0; +} + +static int adf_gen4_vfmig_save_etr_regs(struct adf_mstate_mgr *subs, u8 *state, + u32 size, void *opa) +{ + struct adf_vf_bank_info *vf_bank_info = opa; + struct adf_accel_dev *accel_dev = vf_bank_info->accel_dev; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + u32 pf_bank_nr; + int ret; + + pf_bank_nr = vf_bank_info->bank_nr; + pf_bank_nr += vf_bank_info->vf_nr * hw_data->num_banks_per_vf; + + ret = hw_data->bank_state_save(accel_dev, pf_bank_nr, + (struct bank_state *)state); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save regs for vf%d bank%d\n", + vf_bank_info->vf_nr, vf_bank_info->bank_nr); + return ret; + } + + return sizeof(struct bank_state); +} + +static int adf_gen4_vfmig_save_etr_bank(struct adf_accel_dev *accel_dev, + u32 vf_nr, u32 bank_nr, + struct adf_mstate_mgr *mstate_mgr) +{ + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_vf_bank_info vf_bank_info; + struct adf_mstate_mgr sub_sects_mgr; + char bank_ids[ADF_MSTATE_ID_LEN]; + + snprintf(bank_ids, sizeof(bank_ids), ADF_MSTATE_BANK_IDX_IDS "%x", bank_nr); + + subsec = adf_mstate_sect_add(mstate_mgr, bank_ids, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_BANK_IDX_IDS, vf_nr, bank_nr); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + vf_bank_info.accel_dev = accel_dev; + vf_bank_info.vf_nr = vf_nr; + vf_bank_info.bank_nr = bank_nr; + l2_subsec = adf_mstate_sect_add(&sub_sects_mgr, ADF_MSTATE_ETR_REGS_IDS, + adf_gen4_vfmig_save_etr_regs, + &vf_bank_info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), + "Failed to add sec %s for vf%d bank%d\n", + ADF_MSTATE_ETR_REGS_IDS, vf_nr, bank_nr); + return -EINVAL; + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_etr(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec; + int ret, i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_ETRB_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_ETRB_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < hw_data->num_banks_per_vf; i++) { + ret = adf_gen4_vfmig_save_etr_bank(accel_dev, vf_nr, i, + &sub_sects_mgr); + if (ret) + return ret; + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_misc(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + void __iomem *csr = adf_get_pmisc_base(accel_dev); + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct adf_mstate_mgr sub_sects_mgr; + struct { + char *id; + u64 offset; + } misc_states[] = { + {ADF_MSTATE_VINTSRC_IDS, ADF_GEN4_VINTSOU_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_IDS, ADF_GEN4_VINTMSK_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTSRC_PF2VM_IDS, ADF_GEN4_VINTSOUPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VINTMSK_PF2VM_IDS, ADF_GEN4_VINTMSKPF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_PF2VM_IDS, ADF_GEN4_PF2VM_OFFSET(vf_nr)}, + {ADF_MSTATE_VM2PF_IDS, ADF_GEN4_VM2PF_OFFSET(vf_nr)}, + }; + ktime_t time_exp; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_MISCB_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_MISCB_IDS); + return -EINVAL; + } + + time_exp = ktime_add_us(ktime_get(), ADF_GEN4_PFVF_RSP_TIMEOUT_US); + while (!mutex_trylock(&vf_info->pfvf_mig_lock)) { + if (ktime_after(ktime_get(), time_exp)) { + dev_err(&GET_DEV(accel_dev), "Failed to get pfvf mig lock\n"); + return -ETIMEDOUT; + } + usleep_range(500, 1000); + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(misc_states); i++) { + struct adf_mstate_vreginfo info; + u32 regv; + + info.addr = ®v; + info.size = sizeof(regv); + regv = ADF_CSR_RD(csr, misc_states[i].offset); + + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, + misc_states[i].id, + &info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + misc_states[i].id); + mutex_unlock(&vf_info->pfvf_mig_lock); + return -EINVAL; + } + } + + mutex_unlock(&vf_info->pfvf_mig_lock); + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_generic(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct mig_user_sla src_slas[RL_RP_CNT_PER_LEAF_MAX] = { }; + u32 src_sla_cnt; + struct { + char *id; + struct adf_mstate_vreginfo info; + } gen_states[] = { + {ADF_MSTATE_IOV_INIT_IDS, + {&vf_info->init, sizeof(vf_info->init)}}, + {ADF_MSTATE_COMPAT_VER_IDS, + {&vf_info->vf_compat_ver, sizeof(vf_info->vf_compat_ver)}}, + {ADF_MSTATE_SLA_IDS, {src_slas, 0}}, + }; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_GEN_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_GEN_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(gen_states); i++) { + if (gen_states[i].info.addr == src_slas) { + src_sla_cnt = adf_gen4_vfmig_get_slas(accel_dev, vf_nr, src_slas); + gen_states[i].info.size = src_sla_cnt * sizeof(struct mig_user_sla); + } + + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, + gen_states[i].id, + &gen_states[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + gen_states[i].id); + return -EINVAL; + } + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_config(struct adf_accel_dev *accel_dev, u32 vf_nr) +{ + struct adf_accel_vf_info *vf_info = &accel_dev->pf.vf_info[vf_nr]; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + struct adf_gen4_vfmig *vfmig = vf_info->mig_priv; + struct adf_mstate_mgr *mstate_mgr = vfmig->mstate_mgr; + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *subsec, *l2_subsec; + struct { + char *id; + struct adf_mstate_vreginfo info; + } setups[] = { + {ADF_MSTATE_GEN_CAP_IDS, + {&hw_data->accel_capabilities_mask, sizeof(hw_data->accel_capabilities_mask)}}, + {ADF_MSTATE_GEN_SVCMAP_IDS, + {&hw_data->ring_to_svc_map, sizeof(hw_data->ring_to_svc_map)}}, + {ADF_MSTATE_GEN_EXTDC_IDS, + {&hw_data->extended_dc_capabilities, sizeof(hw_data->extended_dc_capabilities)}}, + }; + int i; + + subsec = adf_mstate_sect_add(mstate_mgr, ADF_MSTATE_CONFIG_IDS, NULL, NULL); + if (!subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + ADF_MSTATE_CONFIG_IDS); + return -EINVAL; + } + + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mstate_mgr); + for (i = 0; i < ARRAY_SIZE(setups); i++) { + l2_subsec = adf_mstate_sect_add_vreg(&sub_sects_mgr, setups[i].id, + &setups[i].info); + if (!l2_subsec) { + dev_err(&GET_DEV(accel_dev), "Failed to add sec %s\n", + setups[i].id); + return -EINVAL; + } + } + adf_mstate_sect_update(mstate_mgr, &sub_sects_mgr, subsec); + + return 0; +} + +static int adf_gen4_vfmig_save_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + ret = adf_gen4_vfmig_save_setup(mdev); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save setup for vf_nr %d\n", vf_nr); + return ret; + } + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state + mdev->setup_size, + mdev->state_size - mdev->setup_size); + if (!adf_mstate_preamble_add(vfmig->mstate_mgr)) + return -EINVAL; + + ret = adf_gen4_vfmig_save_generic(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save generic state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_save_misc(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save misc bar state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_save_etr(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to save etr bar state for vf_nr %d\n", vf_nr); + return ret; + } + + adf_mstate_preamble_update(vfmig->mstate_mgr); + + return 0; +} + +static int adf_gen4_vfmig_load_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + ret = adf_gen4_vfmig_load_setup(mdev, mdev->state_size); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Failed to load setup for vf_nr %d\n", + vf_nr); + return ret; + } + + ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr, + mdev->state + mdev->remote_setup_size, + mdev->state_size - mdev->remote_setup_size, + NULL, NULL); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Invalid state for vf_nr %d\n", + vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_generic(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load general state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_misc(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load misc bar state for vf_nr %d\n", vf_nr); + return ret; + } + + ret = adf_gen4_vfmig_load_etr(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load etr bar state for vf_nr %d\n", vf_nr); + return ret; + } + + return 0; +} + +static int adf_gen4_vfmig_save_setup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + if (mdev->setup_size) + return 0; + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state, mdev->state_size); + if (!adf_mstate_preamble_add(vfmig->mstate_mgr)) + return -EINVAL; + + ret = adf_gen4_vfmig_save_config(accel_dev, mdev->vf_id); + if (ret) + return ret; + + adf_mstate_preamble_update(vfmig->mstate_mgr); + mdev->setup_size = adf_mstate_state_size(vfmig->mstate_mgr); + + return 0; +} + +static int adf_gen4_vfmig_load_setup(struct qat_mig_dev *mdev, int len) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + struct adf_accel_vf_info *vf_info; + struct adf_gen4_vfmig *vfmig; + u32 vf_nr = mdev->vf_id; + u32 setup_size; + int ret; + + vf_info = &accel_dev->pf.vf_info[vf_nr]; + vfmig = vf_info->mig_priv; + + if (mdev->remote_setup_size) + return 0; + + if (len < sizeof(struct adf_mstate_preh)) + return -EAGAIN; + + adf_mstate_mgr_init(vfmig->mstate_mgr, mdev->state, mdev->state_size); + setup_size = adf_mstate_state_size_from_remote(vfmig->mstate_mgr); + if (setup_size > mdev->state_size) + return -EINVAL; + + if (len < setup_size) + return -EAGAIN; + + ret = adf_mstate_mgr_init_from_remote(vfmig->mstate_mgr, mdev->state, + setup_size, NULL, NULL); + if (ret) { + dev_err(&GET_DEV(accel_dev), "Invalid setup for vf_nr %d\n", + vf_nr); + return ret; + } + + mdev->remote_setup_size = setup_size; + + ret = adf_gen4_vfmig_load_config(accel_dev, vf_nr); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Failed to load config for vf_nr %d\n", vf_nr); + return ret; + } + + return 0; +} + +void adf_gen4_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops) +{ + vfmig_ops->init = adf_gen4_vfmig_init_device; + vfmig_ops->cleanup = adf_gen4_vfmig_cleanup_device; + vfmig_ops->reset = adf_gen4_vfmig_reset_device; + vfmig_ops->open = adf_gen4_vfmig_open_device; + vfmig_ops->close = adf_gen4_vfmig_close_device; + vfmig_ops->suspend = adf_gen4_vfmig_suspend_device; + vfmig_ops->resume = adf_gen4_vfmig_resume_device; + vfmig_ops->save_state = adf_gen4_vfmig_save_state; + vfmig_ops->load_state = adf_gen4_vfmig_load_state; + vfmig_ops->load_setup = adf_gen4_vfmig_load_setup; + vfmig_ops->save_setup = adf_gen4_vfmig_save_setup; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_vf_mig_ops); diff --git a/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h new file mode 100644 index 0000000000..72216d078e --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_gen4_vf_mig.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ +#ifndef ADF_GEN4_VF_MIG_H_ +#define ADF_GEN4_VF_MIG_H_ + +#include "adf_accel_devices.h" + +void adf_gen4_init_vf_mig_ops(struct qat_migdev_ops *vfmig_ops); + +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c index 13f48d2f6d..b19aa1ef8e 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c +++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.c @@ -23,12 +23,6 @@ #define ADF_HB_EMPTY_SIG 0xA5A5A5A5 -/* Heartbeat counter pair */ -struct hb_cnt_pair { - __u16 resp_heartbeat_cnt; - __u16 req_heartbeat_cnt; -}; - static int adf_hb_check_polling_freq(struct adf_accel_dev *accel_dev) { u64 curr_time = adf_clock_get_current_time(); @@ -211,6 +205,19 @@ static int adf_hb_get_status(struct adf_accel_dev *accel_dev) return ret; } +static void adf_heartbeat_reset(struct adf_accel_dev *accel_dev) +{ + u64 curr_time = adf_clock_get_current_time(); + u64 time_since_reset = curr_time - accel_dev->heartbeat->last_hb_reset_time; + + if (time_since_reset < ADF_CFG_HB_RESET_MS) + return; + + accel_dev->heartbeat->last_hb_reset_time = curr_time; + if (adf_notify_fatal_error(accel_dev)) + dev_err(&GET_DEV(accel_dev), "Failed to notify fatal error\n"); +} + void adf_heartbeat_status(struct adf_accel_dev *accel_dev, enum adf_device_heartbeat_status *hb_status) { @@ -235,6 +242,7 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev, "Heartbeat ERROR: QAT is not responding.\n"); *hb_status = HB_DEV_UNRESPONSIVE; hb->hb_failed_counter++; + adf_heartbeat_reset(accel_dev); return; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h index b22e3cb297..16fdfb48b1 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h +++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat.h @@ -13,17 +13,26 @@ struct dentry; #define ADF_CFG_HB_TIMER_DEFAULT_MS 500 #define ADF_CFG_HB_COUNT_THRESHOLD 3 +#define ADF_CFG_HB_RESET_MS 5000 + enum adf_device_heartbeat_status { HB_DEV_UNRESPONSIVE = 0, HB_DEV_ALIVE, HB_DEV_UNSUPPORTED, }; +/* Heartbeat counter pair */ +struct hb_cnt_pair { + __u16 resp_heartbeat_cnt; + __u16 req_heartbeat_cnt; +}; + struct adf_heartbeat { unsigned int hb_sent_counter; unsigned int hb_failed_counter; unsigned int hb_timer; u64 last_hb_check_time; + u64 last_hb_reset_time; bool ctrs_cnt_checked; struct hb_dma_addr { dma_addr_t phy_addr; @@ -35,6 +44,9 @@ struct adf_heartbeat { struct dentry *cfg; struct dentry *sent; struct dentry *failed; +#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION + struct dentry *inject_error; +#endif } dbgfs; }; @@ -51,6 +63,15 @@ void adf_heartbeat_status(struct adf_accel_dev *accel_dev, enum adf_device_heartbeat_status *hb_status); void adf_heartbeat_check_ctrs(struct adf_accel_dev *accel_dev); +#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION +int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev); +#else +static inline int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev) +{ + return -EPERM; +} +#endif + #else static inline int adf_heartbeat_init(struct adf_accel_dev *accel_dev) { diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c index 2661af6a2e..cccdff24b4 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_dbgfs.c @@ -155,6 +155,44 @@ static const struct file_operations adf_hb_cfg_fops = { .write = adf_hb_cfg_write, }; +static ssize_t adf_hb_error_inject_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct adf_accel_dev *accel_dev = file->private_data; + char buf[3]; + int ret; + + /* last byte left as string termination */ + if (*ppos != 0 || count != 2) + return -EINVAL; + + if (copy_from_user(buf, user_buf, count)) + return -EFAULT; + buf[count] = '\0'; + + if (buf[0] != '1') + return -EINVAL; + + ret = adf_heartbeat_inject_error(accel_dev); + if (ret) { + dev_err(&GET_DEV(accel_dev), + "Heartbeat error injection failed with status %d\n", + ret); + return ret; + } + + dev_info(&GET_DEV(accel_dev), "Heartbeat error injection enabled\n"); + + return count; +} + +static const struct file_operations adf_hb_error_inject_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .write = adf_hb_error_inject_write, +}; + void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev) { struct adf_heartbeat *hb = accel_dev->heartbeat; @@ -171,6 +209,17 @@ void adf_heartbeat_dbgfs_add(struct adf_accel_dev *accel_dev) &hb->hb_failed_counter, &adf_hb_stats_fops); hb->dbgfs.cfg = debugfs_create_file("config", 0600, hb->dbgfs.base_dir, accel_dev, &adf_hb_cfg_fops); + + if (IS_ENABLED(CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION)) { + struct dentry *inject_error __maybe_unused; + + inject_error = debugfs_create_file("inject_error", 0200, + hb->dbgfs.base_dir, accel_dev, + &adf_hb_error_inject_fops); +#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION + hb->dbgfs.inject_error = inject_error; +#endif + } } EXPORT_SYMBOL_GPL(adf_heartbeat_dbgfs_add); @@ -189,6 +238,10 @@ void adf_heartbeat_dbgfs_rm(struct adf_accel_dev *accel_dev) hb->dbgfs.failed = NULL; debugfs_remove(hb->dbgfs.cfg); hb->dbgfs.cfg = NULL; +#ifdef CONFIG_CRYPTO_DEV_QAT_ERROR_INJECTION + debugfs_remove(hb->dbgfs.inject_error); + hb->dbgfs.inject_error = NULL; +#endif debugfs_remove(hb->dbgfs.base_dir); hb->dbgfs.base_dir = NULL; } diff --git a/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c new file mode 100644 index 0000000000..a3b474bdef --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_heartbeat_inject.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation */ +#include <linux/random.h> + +#include "adf_admin.h" +#include "adf_common_drv.h" +#include "adf_heartbeat.h" + +#define MAX_HB_TICKS 0xFFFFFFFF + +static int adf_hb_set_timer_to_max(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + + accel_dev->heartbeat->hb_timer = 0; + + if (hw_data->stop_timer) + hw_data->stop_timer(accel_dev); + + return adf_send_admin_hb_timer(accel_dev, MAX_HB_TICKS); +} + +static void adf_set_hb_counters_fail(struct adf_accel_dev *accel_dev, u32 ae, + u32 thr) +{ + struct hb_cnt_pair *stats = accel_dev->heartbeat->dma.virt_addr; + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + const size_t max_aes = hw_device->get_num_aes(hw_device); + const size_t hb_ctrs = hw_device->num_hb_ctrs; + size_t thr_id = ae * hb_ctrs + thr; + u16 num_rsp = stats[thr_id].resp_heartbeat_cnt; + + /* + * Inject live.req != live.rsp and live.rsp == last.rsp + * to trigger the heartbeat error detection + */ + stats[thr_id].req_heartbeat_cnt++; + stats += (max_aes * hb_ctrs); + stats[thr_id].resp_heartbeat_cnt = num_rsp; +} + +int adf_heartbeat_inject_error(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + const size_t max_aes = hw_device->get_num_aes(hw_device); + const size_t hb_ctrs = hw_device->num_hb_ctrs; + u32 rand, rand_ae, rand_thr; + unsigned long ae_mask; + int ret; + + ae_mask = hw_device->ae_mask; + + do { + /* Ensure we have a valid ae */ + get_random_bytes(&rand, sizeof(rand)); + rand_ae = rand % max_aes; + } while (!test_bit(rand_ae, &ae_mask)); + + get_random_bytes(&rand, sizeof(rand)); + rand_thr = rand % hb_ctrs; + + /* Increase the heartbeat timer to prevent FW updating HB counters */ + ret = adf_hb_set_timer_to_max(accel_dev); + if (ret) + return ret; + + /* Configure worker threads to stop processing any packet */ + ret = adf_disable_arb_thd(accel_dev, rand_ae, rand_thr); + if (ret) + return ret; + + /* Change HB counters memory to simulate a hang */ + adf_set_hb_counters_fail(accel_dev, rand_ae, rand_thr); + + return 0; +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c b/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c index da69566992..65bd26b25a 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c +++ b/drivers/crypto/intel/qat/qat_common/adf_hw_arbiter.c @@ -103,3 +103,28 @@ void adf_exit_arb(struct adf_accel_dev *accel_dev) csr_ops->write_csr_ring_srv_arb_en(csr, i, 0); } EXPORT_SYMBOL_GPL(adf_exit_arb); + +int adf_disable_arb_thd(struct adf_accel_dev *accel_dev, u32 ae, u32 thr) +{ + void __iomem *csr = accel_dev->transport->banks[0].csr_addr; + struct adf_hw_device_data *hw_data = accel_dev->hw_device; + const u32 *thd_2_arb_cfg; + struct arb_info info; + u32 ae_thr_map; + + if (ADF_AE_STRAND0_THREAD == thr || ADF_AE_STRAND1_THREAD == thr) + thr = ADF_AE_ADMIN_THREAD; + + hw_data->get_arb_info(&info); + thd_2_arb_cfg = hw_data->get_arb_mapping(accel_dev); + if (!thd_2_arb_cfg) + return -EFAULT; + + /* Disable scheduling for this particular AE and thread */ + ae_thr_map = *(thd_2_arb_cfg + ae); + ae_thr_map &= ~(GENMASK(3, 0) << (thr * BIT(2))); + + WRITE_CSR_ARB_WT2SAM(csr, info.arb_offset, info.wt2sam_offset, ae, + ae_thr_map); + return 0; +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_init.c b/drivers/crypto/intel/qat/qat_common/adf_init.c index f43ae91115..74f0818c07 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_init.c +++ b/drivers/crypto/intel/qat/qat_common/adf_init.c @@ -433,6 +433,18 @@ int adf_dev_restarted_notify(struct adf_accel_dev *accel_dev) return 0; } +void adf_error_notifier(struct adf_accel_dev *accel_dev) +{ + struct service_hndl *service; + + list_for_each_entry(service, &service_table, list) { + if (service->event_hld(accel_dev, ADF_EVENT_FATAL_ERROR)) + dev_err(&GET_DEV(accel_dev), + "Failed to send error event to %s.\n", + service->name); + } +} + static int adf_dev_shutdown_cache_cfg(struct adf_accel_dev *accel_dev) { char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; diff --git a/drivers/crypto/intel/qat/qat_common/adf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_isr.c index 3557a0d6de..cae1aee547 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_isr.c +++ b/drivers/crypto/intel/qat/qat_common/adf_isr.c @@ -139,8 +139,13 @@ static bool adf_handle_ras_int(struct adf_accel_dev *accel_dev) if (ras_ops->handle_interrupt && ras_ops->handle_interrupt(accel_dev, &reset_required)) { - if (reset_required) + if (reset_required) { dev_err(&GET_DEV(accel_dev), "Fatal error, reset required\n"); + if (adf_notify_fatal_error(accel_dev)) + dev_err(&GET_DEV(accel_dev), + "Failed to notify fatal error\n"); + } + return true; } @@ -272,7 +277,7 @@ static int adf_isr_alloc_msix_vectors_data(struct adf_accel_dev *accel_dev) if (!accel_dev->pf.vf_info) msix_num_entries += hw_data->num_banks; - irqs = kzalloc_node(msix_num_entries * sizeof(*irqs), + irqs = kcalloc_node(msix_num_entries, sizeof(*irqs), GFP_KERNEL, dev_to_node(&GET_DEV(accel_dev))); if (!irqs) return -ENOMEM; @@ -375,8 +380,6 @@ EXPORT_SYMBOL_GPL(adf_isr_resource_alloc); /** * adf_init_misc_wq() - Init misc workqueue * - * Function init workqueue 'qat_misc_wq' for general purpose. - * * Return: 0 on success, error code otherwise. */ int __init adf_init_misc_wq(void) diff --git a/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c new file mode 100644 index 0000000000..41cc763a74 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ + +#include <linux/slab.h> +#include <linux/types.h> +#include "adf_mstate_mgr.h" + +#define ADF_MSTATE_MAGIC 0xADF5CAEA +#define ADF_MSTATE_VERSION 0x1 + +struct adf_mstate_sect_h { + u8 id[ADF_MSTATE_ID_LEN]; + u32 size; + u32 sub_sects; + u8 state[]; +}; + +u32 adf_mstate_state_size(struct adf_mstate_mgr *mgr) +{ + return mgr->state - mgr->buf; +} + +static inline u32 adf_mstate_avail_room(struct adf_mstate_mgr *mgr) +{ + return mgr->buf + mgr->size - mgr->state; +} + +void adf_mstate_mgr_init(struct adf_mstate_mgr *mgr, u8 *buf, u32 size) +{ + mgr->buf = buf; + mgr->state = buf; + mgr->size = size; + mgr->n_sects = 0; +}; + +struct adf_mstate_mgr *adf_mstate_mgr_new(u8 *buf, u32 size) +{ + struct adf_mstate_mgr *mgr; + + mgr = kzalloc(sizeof(*mgr), GFP_KERNEL); + if (!mgr) + return NULL; + + adf_mstate_mgr_init(mgr, buf, size); + + return mgr; +} + +void adf_mstate_mgr_destroy(struct adf_mstate_mgr *mgr) +{ + kfree(mgr); +} + +void adf_mstate_mgr_init_from_parent(struct adf_mstate_mgr *mgr, + struct adf_mstate_mgr *p_mgr) +{ + adf_mstate_mgr_init(mgr, p_mgr->state, + p_mgr->size - adf_mstate_state_size(p_mgr)); +} + +void adf_mstate_mgr_init_from_psect(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *p_sect) +{ + adf_mstate_mgr_init(mgr, p_sect->state, p_sect->size); + mgr->n_sects = p_sect->sub_sects; +} + +static void adf_mstate_preamble_init(struct adf_mstate_preh *preamble) +{ + preamble->magic = ADF_MSTATE_MAGIC; + preamble->version = ADF_MSTATE_VERSION; + preamble->preh_len = sizeof(*preamble); + preamble->size = 0; + preamble->n_sects = 0; +} + +/* default preambles checker */ +static int adf_mstate_preamble_def_checker(struct adf_mstate_preh *preamble, + void *opaque) +{ + struct adf_mstate_mgr *mgr = opaque; + + if (preamble->magic != ADF_MSTATE_MAGIC || + preamble->version > ADF_MSTATE_VERSION || + preamble->preh_len > mgr->size) { + pr_debug("QAT: LM - Invalid state (magic=%#x, version=%#x, hlen=%u), state_size=%u\n", + preamble->magic, preamble->version, preamble->preh_len, + mgr->size); + return -EINVAL; + } + + return 0; +} + +struct adf_mstate_preh *adf_mstate_preamble_add(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *pre = (struct adf_mstate_preh *)mgr->buf; + + if (adf_mstate_avail_room(mgr) < sizeof(*pre)) { + pr_err("QAT: LM - Not enough space for preamble\n"); + return NULL; + } + + adf_mstate_preamble_init(pre); + mgr->state += pre->preh_len; + + return pre; +} + +int adf_mstate_preamble_update(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *preamble = (struct adf_mstate_preh *)mgr->buf; + + preamble->size = adf_mstate_state_size(mgr) - preamble->preh_len; + preamble->n_sects = mgr->n_sects; + + return 0; +} + +static void adf_mstate_dump_sect(struct adf_mstate_sect_h *sect, + const char *prefix) +{ + pr_debug("QAT: LM - %s QAT state section %s\n", prefix, sect->id); + print_hex_dump_debug("h-", DUMP_PREFIX_OFFSET, 16, 2, sect, + sizeof(*sect), true); + print_hex_dump_debug("s-", DUMP_PREFIX_OFFSET, 16, 2, sect->state, + sect->size, true); +} + +static inline void __adf_mstate_sect_update(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *sect, + u32 size, + u32 n_subsects) +{ + sect->size += size; + sect->sub_sects += n_subsects; + mgr->n_sects++; + mgr->state += sect->size; + + adf_mstate_dump_sect(sect, "Add"); +} + +void adf_mstate_sect_update(struct adf_mstate_mgr *p_mgr, + struct adf_mstate_mgr *curr_mgr, + struct adf_mstate_sect_h *sect) +{ + __adf_mstate_sect_update(p_mgr, sect, adf_mstate_state_size(curr_mgr), + curr_mgr->n_sects); +} + +static struct adf_mstate_sect_h *adf_mstate_sect_add_header(struct adf_mstate_mgr *mgr, + const char *id) +{ + struct adf_mstate_sect_h *sect = (struct adf_mstate_sect_h *)(mgr->state); + + if (adf_mstate_avail_room(mgr) < sizeof(*sect)) { + pr_debug("QAT: LM - Not enough space for header of QAT state sect %s\n", id); + return NULL; + } + + strscpy(sect->id, id, sizeof(sect->id)); + sect->size = 0; + sect->sub_sects = 0; + mgr->state += sizeof(*sect); + + return sect; +} + +struct adf_mstate_sect_h *adf_mstate_sect_add_vreg(struct adf_mstate_mgr *mgr, + const char *id, + struct adf_mstate_vreginfo *info) +{ + struct adf_mstate_sect_h *sect; + + sect = adf_mstate_sect_add_header(mgr, id); + if (!sect) + return NULL; + + if (adf_mstate_avail_room(mgr) < info->size) { + pr_debug("QAT: LM - Not enough space for QAT state sect %s, requires %u\n", + id, info->size); + return NULL; + } + + memcpy(sect->state, info->addr, info->size); + __adf_mstate_sect_update(mgr, sect, info->size, 0); + + return sect; +} + +struct adf_mstate_sect_h *adf_mstate_sect_add(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_populate populate, + void *opaque) +{ + struct adf_mstate_mgr sub_sects_mgr; + struct adf_mstate_sect_h *sect; + int avail_room, size; + + sect = adf_mstate_sect_add_header(mgr, id); + if (!sect) + return NULL; + + if (!populate) + return sect; + + avail_room = adf_mstate_avail_room(mgr); + adf_mstate_mgr_init_from_parent(&sub_sects_mgr, mgr); + + size = (*populate)(&sub_sects_mgr, sect->state, avail_room, opaque); + if (size < 0) + return NULL; + + size += adf_mstate_state_size(&sub_sects_mgr); + if (avail_room < size) { + pr_debug("QAT: LM - Not enough space for QAT state sect %s, requires %u\n", + id, size); + return NULL; + } + __adf_mstate_sect_update(mgr, sect, size, sub_sects_mgr.n_sects); + + return sect; +} + +static int adf_mstate_sect_validate(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_sect_h *start = (struct adf_mstate_sect_h *)mgr->state; + struct adf_mstate_sect_h *sect = start; + u64 end; + int i; + + end = (uintptr_t)mgr->buf + mgr->size; + for (i = 0; i < mgr->n_sects; i++) { + uintptr_t s_start = (uintptr_t)sect->state; + uintptr_t s_end = s_start + sect->size; + + if (s_end < s_start || s_end > end) { + pr_debug("QAT: LM - Corrupted state section (index=%u, size=%u) in state_mgr (size=%u, secs=%u)\n", + i, sect->size, mgr->size, mgr->n_sects); + return -EINVAL; + } + sect = (struct adf_mstate_sect_h *)s_end; + } + + pr_debug("QAT: LM - Scanned section (last child=%s, size=%lu) in state_mgr (size=%u, secs=%u)\n", + start->id, sizeof(struct adf_mstate_sect_h) * (ulong)(sect - start), + mgr->size, mgr->n_sects); + + return 0; +} + +u32 adf_mstate_state_size_from_remote(struct adf_mstate_mgr *mgr) +{ + struct adf_mstate_preh *preh = (struct adf_mstate_preh *)mgr->buf; + + return preh->preh_len + preh->size; +} + +int adf_mstate_mgr_init_from_remote(struct adf_mstate_mgr *mgr, u8 *buf, u32 size, + adf_mstate_preamble_checker pre_checker, + void *opaque) +{ + struct adf_mstate_preh *pre; + int ret; + + adf_mstate_mgr_init(mgr, buf, size); + pre = (struct adf_mstate_preh *)(mgr->buf); + + pr_debug("QAT: LM - Dump state preambles\n"); + print_hex_dump_debug("", DUMP_PREFIX_OFFSET, 16, 2, pre, pre->preh_len, 0); + + if (pre_checker) + ret = (*pre_checker)(pre, opaque); + else + ret = adf_mstate_preamble_def_checker(pre, mgr); + if (ret) + return ret; + + mgr->state = mgr->buf + pre->preh_len; + mgr->n_sects = pre->n_sects; + + return adf_mstate_sect_validate(mgr); +} + +struct adf_mstate_sect_h *adf_mstate_sect_lookup(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_action action, + void *opaque) +{ + struct adf_mstate_sect_h *sect = (struct adf_mstate_sect_h *)mgr->state; + struct adf_mstate_mgr sub_sects_mgr; + int i, ret; + + for (i = 0; i < mgr->n_sects; i++) { + if (!strncmp(sect->id, id, sizeof(sect->id))) + goto found; + + sect = (struct adf_mstate_sect_h *)(sect->state + sect->size); + } + + return NULL; + +found: + adf_mstate_dump_sect(sect, "Found"); + + adf_mstate_mgr_init_from_psect(&sub_sects_mgr, sect); + if (sect->sub_sects && adf_mstate_sect_validate(&sub_sects_mgr)) + return NULL; + + if (!action) + return sect; + + ret = (*action)(&sub_sects_mgr, sect->state, sect->size, opaque); + if (ret) + return NULL; + + return sect; +} diff --git a/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h new file mode 100644 index 0000000000..81d263a596 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/adf_mstate_mgr.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2024 Intel Corporation */ + +#ifndef ADF_MSTATE_MGR_H +#define ADF_MSTATE_MGR_H + +#define ADF_MSTATE_ID_LEN 8 + +#define ADF_MSTATE_ETRB_IDS "ETRBAR" +#define ADF_MSTATE_MISCB_IDS "MISCBAR" +#define ADF_MSTATE_EXTB_IDS "EXTBAR" +#define ADF_MSTATE_GEN_IDS "GENER" +#define ADF_MSTATE_CONFIG_IDS "CONFIG" +#define ADF_MSTATE_SECTION_NUM 5 + +#define ADF_MSTATE_BANK_IDX_IDS "bnk" + +#define ADF_MSTATE_ETR_REGS_IDS "mregs" +#define ADF_MSTATE_VINTSRC_IDS "visrc" +#define ADF_MSTATE_VINTMSK_IDS "vimsk" +#define ADF_MSTATE_SLA_IDS "sla" +#define ADF_MSTATE_IOV_INIT_IDS "iovinit" +#define ADF_MSTATE_COMPAT_VER_IDS "compver" +#define ADF_MSTATE_GEN_CAP_IDS "gencap" +#define ADF_MSTATE_GEN_SVCMAP_IDS "svcmap" +#define ADF_MSTATE_GEN_EXTDC_IDS "extdc" +#define ADF_MSTATE_VINTSRC_PF2VM_IDS "vispv" +#define ADF_MSTATE_VINTMSK_PF2VM_IDS "vimpv" +#define ADF_MSTATE_VM2PF_IDS "vm2pf" +#define ADF_MSTATE_PF2VM_IDS "pf2vm" + +struct adf_mstate_mgr { + u8 *buf; + u8 *state; + u32 size; + u32 n_sects; +}; + +struct adf_mstate_preh { + u32 magic; + u32 version; + u16 preh_len; + u16 n_sects; + u32 size; +}; + +struct adf_mstate_vreginfo { + void *addr; + u32 size; +}; + +struct adf_mstate_sect_h; + +typedef int (*adf_mstate_preamble_checker)(struct adf_mstate_preh *preamble, void *opa); +typedef int (*adf_mstate_populate)(struct adf_mstate_mgr *sub_mgr, u8 *buf, + u32 size, void *opa); +typedef int (*adf_mstate_action)(struct adf_mstate_mgr *sub_mgr, u8 *buf, u32 size, + void *opa); + +struct adf_mstate_mgr *adf_mstate_mgr_new(u8 *buf, u32 size); +void adf_mstate_mgr_destroy(struct adf_mstate_mgr *mgr); +void adf_mstate_mgr_init(struct adf_mstate_mgr *mgr, u8 *buf, u32 size); +void adf_mstate_mgr_init_from_parent(struct adf_mstate_mgr *mgr, + struct adf_mstate_mgr *p_mgr); +void adf_mstate_mgr_init_from_psect(struct adf_mstate_mgr *mgr, + struct adf_mstate_sect_h *p_sect); +int adf_mstate_mgr_init_from_remote(struct adf_mstate_mgr *mgr, + u8 *buf, u32 size, + adf_mstate_preamble_checker checker, + void *opaque); +struct adf_mstate_preh *adf_mstate_preamble_add(struct adf_mstate_mgr *mgr); +int adf_mstate_preamble_update(struct adf_mstate_mgr *mgr); +u32 adf_mstate_state_size(struct adf_mstate_mgr *mgr); +u32 adf_mstate_state_size_from_remote(struct adf_mstate_mgr *mgr); +void adf_mstate_sect_update(struct adf_mstate_mgr *p_mgr, + struct adf_mstate_mgr *curr_mgr, + struct adf_mstate_sect_h *sect); +struct adf_mstate_sect_h *adf_mstate_sect_add_vreg(struct adf_mstate_mgr *mgr, + const char *id, + struct adf_mstate_vreginfo *info); +struct adf_mstate_sect_h *adf_mstate_sect_add(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_populate populate, + void *opaque); +struct adf_mstate_sect_h *adf_mstate_sect_lookup(struct adf_mstate_mgr *mgr, + const char *id, + adf_mstate_action action, + void *opaque); +#endif diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h index 204a424389..d1b3ef9cad 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_msg.h @@ -99,6 +99,8 @@ enum pf2vf_msgtype { ADF_PF2VF_MSGTYPE_RESTARTING = 0x01, ADF_PF2VF_MSGTYPE_VERSION_RESP = 0x02, ADF_PF2VF_MSGTYPE_BLKMSG_RESP = 0x03, + ADF_PF2VF_MSGTYPE_FATAL_ERROR = 0x04, + ADF_PF2VF_MSGTYPE_RESTARTED = 0x05, /* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */ ADF_PF2VF_MSGTYPE_RP_RESET_RESP = 0x10, }; @@ -112,6 +114,7 @@ enum vf2pf_msgtype { ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ = 0x07, ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ = 0x08, ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ = 0x09, + ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE = 0x0a, /* Values from 0x10 are Gen4 specific, message type is only 4 bits in Gen2 devices. */ ADF_VF2PF_MSGTYPE_RP_RESET = 0x10, }; @@ -124,8 +127,10 @@ enum pfvf_compatibility_version { ADF_PFVF_COMPAT_FAST_ACK = 0x03, /* Ring to service mapping support for non-standard mappings */ ADF_PFVF_COMPAT_RING_TO_SVC_MAP = 0x04, + /* Fallback compat */ + ADF_PFVF_COMPAT_FALLBACK = 0x05, /* Reference to the latest version */ - ADF_PFVF_COMPAT_THIS_VERSION = 0x04, + ADF_PFVF_COMPAT_THIS_VERSION = 0x05, }; /* PF->VF Version Response */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c index 14c069f0d7..0e31f4b418 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.c @@ -1,21 +1,83 @@ // SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only) /* Copyright(c) 2015 - 2021 Intel Corporation */ +#include <linux/delay.h> #include <linux/pci.h> #include "adf_accel_devices.h" #include "adf_pfvf_msg.h" #include "adf_pfvf_pf_msg.h" #include "adf_pfvf_pf_proto.h" +#define ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY 100 +#define ADF_VF_SHUTDOWN_RETRY 100 + void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev) { struct adf_accel_vf_info *vf; struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTING }; int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev)); + dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarting\n"); for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) { - if (vf->init && adf_send_pf2vf_msg(accel_dev, i, msg)) + vf->restarting = false; + if (!vf->init) + continue; + if (adf_send_pf2vf_msg(accel_dev, i, msg)) dev_err(&GET_DEV(accel_dev), "Failed to send restarting msg to VF%d\n", i); + else if (vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK) + vf->restarting = true; + } +} + +void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev) +{ + int num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev)); + int i, retries = ADF_VF_SHUTDOWN_RETRY; + struct adf_accel_vf_info *vf; + bool vf_running; + + dev_dbg(&GET_DEV(accel_dev), "pf2vf wait for restarting complete\n"); + do { + vf_running = false; + for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) + if (vf->restarting) + vf_running = true; + if (!vf_running) + break; + msleep(ADF_PF_WAIT_RESTARTING_COMPLETE_DELAY); + } while (--retries); + + if (vf_running) + dev_warn(&GET_DEV(accel_dev), "Some VFs are still running\n"); +} + +void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev) +{ + struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_RESTARTED }; + int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev)); + struct adf_accel_vf_info *vf; + + dev_dbg(&GET_DEV(accel_dev), "pf2vf notify restarted\n"); + for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) { + if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK && + adf_send_pf2vf_msg(accel_dev, i, msg)) + dev_err(&GET_DEV(accel_dev), + "Failed to send restarted msg to VF%d\n", i); + } +} + +void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev) +{ + struct pfvf_message msg = { .type = ADF_PF2VF_MSGTYPE_FATAL_ERROR }; + int i, num_vfs = pci_num_vf(accel_to_pci_dev(accel_dev)); + struct adf_accel_vf_info *vf; + + dev_dbg(&GET_DEV(accel_dev), "pf2vf notify fatal error\n"); + for (i = 0, vf = accel_dev->pf.vf_info; i < num_vfs; i++, vf++) { + if (vf->init && vf->vf_compat_ver >= ADF_PFVF_COMPAT_FALLBACK && + adf_send_pf2vf_msg(accel_dev, i, msg)) + dev_err(&GET_DEV(accel_dev), + "Failed to send fatal error msg to VF%d\n", i); } } diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h index e8982d1ac8..f203d88c91 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_msg.h @@ -5,7 +5,28 @@ #include "adf_accel_devices.h" +#if defined(CONFIG_PCI_IOV) void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev); +void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev); +void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev); +void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev); +#else +static inline void adf_pf2vf_notify_restarting(struct adf_accel_dev *accel_dev) +{ +} + +static inline void adf_pf2vf_wait_for_restarting_complete(struct adf_accel_dev *accel_dev) +{ +} + +static inline void adf_pf2vf_notify_restarted(struct adf_accel_dev *accel_dev) +{ +} + +static inline void adf_pf2vf_notify_fatal_error(struct adf_accel_dev *accel_dev) +{ +} +#endif typedef int (*adf_pf2vf_blkmsg_provider)(struct adf_accel_dev *accel_dev, u8 *buffer, u8 compat); diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c index 388e58bcbc..b9b5e744a3 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_pf_proto.c @@ -242,13 +242,7 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr, "VersionRequest received from VF%d (vers %d) to PF (vers %d)\n", vf_nr, vf_compat_ver, ADF_PFVF_COMPAT_THIS_VERSION); - if (vf_compat_ver == 0) - compat = ADF_PF2VF_VF_INCOMPATIBLE; - else if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION) - compat = ADF_PF2VF_VF_COMPATIBLE; - else - compat = ADF_PF2VF_VF_COMPAT_UNKNOWN; - + compat = adf_vf_compat_checker(vf_compat_ver); vf_info->vf_compat_ver = vf_compat_ver; resp->type = ADF_PF2VF_MSGTYPE_VERSION_RESP; @@ -291,6 +285,14 @@ static int adf_handle_vf2pf_msg(struct adf_accel_dev *accel_dev, u8 vf_nr, vf_info->init = false; } break; + case ADF_VF2PF_MSGTYPE_RESTARTING_COMPLETE: + { + dev_dbg(&GET_DEV(accel_dev), + "Restarting Complete received from VF%d\n", vf_nr); + vf_info->restarting = false; + vf_info->init = false; + } + break; case ADF_VF2PF_MSGTYPE_LARGE_BLOCK_REQ: case ADF_VF2PF_MSGTYPE_MEDIUM_BLOCK_REQ: case ADF_VF2PF_MSGTYPE_SMALL_BLOCK_REQ: diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h b/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h index 2be048e228..1a044297d8 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_utils.h @@ -28,4 +28,15 @@ u32 adf_pfvf_csr_msg_of(struct adf_accel_dev *accel_dev, struct pfvf_message msg struct pfvf_message adf_pfvf_message_of(struct adf_accel_dev *accel_dev, u32 raw_msg, const struct pfvf_csr_format *fmt); +static inline u8 adf_vf_compat_checker(u8 vf_compat_ver) +{ + if (vf_compat_ver == 0) + return ADF_PF2VF_VF_INCOMPATIBLE; + + if (vf_compat_ver <= ADF_PFVF_COMPAT_THIS_VERSION) + return ADF_PF2VF_VF_COMPATIBLE; + + return ADF_PF2VF_VF_COMPAT_UNKNOWN; +} + #endif /* ADF_PFVF_UTILS_H */ diff --git a/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c b/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c index 1015155b63..dc284a089c 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c +++ b/drivers/crypto/intel/qat/qat_common/adf_pfvf_vf_proto.c @@ -308,6 +308,12 @@ static bool adf_handle_pf2vf_msg(struct adf_accel_dev *accel_dev, adf_pf2vf_handle_pf_restarting(accel_dev); return false; + case ADF_PF2VF_MSGTYPE_RESTARTED: + dev_dbg(&GET_DEV(accel_dev), "Restarted message received from PF\n"); + return true; + case ADF_PF2VF_MSGTYPE_FATAL_ERROR: + dev_err(&GET_DEV(accel_dev), "Fatal error received from PF\n"); + return true; case ADF_PF2VF_MSGTYPE_VERSION_RESP: case ADF_PF2VF_MSGTYPE_BLKMSG_RESP: case ADF_PF2VF_MSGTYPE_RP_RESET_RESP: diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.c b/drivers/crypto/intel/qat/qat_common/adf_rl.c index e10f0024f4..346ef8bee9 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.c +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.c @@ -183,14 +183,14 @@ static enum adf_cfg_service_type srv_to_cfg_svc_type(enum adf_base_services rl_s } /** - * get_sla_arr_of_type() - Returns a pointer to SLA type specific array + * adf_rl_get_sla_arr_of_type() - Returns a pointer to SLA type specific array * @rl_data: pointer to ratelimiting data * @type: SLA type * @sla_arr: pointer to variable where requested pointer will be stored * * Return: Max number of elements allowed for the returned array */ -static u32 get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, +u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, struct rl_sla ***sla_arr) { switch (type) { @@ -778,7 +778,7 @@ static void clear_sla(struct adf_rl *rl_data, struct rl_sla *sla) rp_in_use[sla->ring_pairs_ids[i]] = false; update_budget(sla, old_cir, true); - get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); + adf_rl_get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); assign_node_to_parent(rl_data->accel_dev, sla, true); adf_rl_send_admin_delete_msg(rl_data->accel_dev, node_id, sla->type); mark_rps_usage(sla, rl_data->rp_in_use, false); @@ -875,7 +875,7 @@ static int add_update_sla(struct adf_accel_dev *accel_dev, if (!is_update) { mark_rps_usage(sla, rl_data->rp_in_use, true); - get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); + adf_rl_get_sla_arr_of_type(rl_data, sla->type, &sla_type_arr); sla_type_arr[sla->node_id] = sla; rl_data->sla[sla->sla_id] = sla; } @@ -1065,7 +1065,7 @@ void adf_rl_remove_sla_all(struct adf_accel_dev *accel_dev, bool incl_default) /* Unregister and remove all SLAs */ for (j = RL_LEAF; j >= end_type; j--) { - max_id = get_sla_arr_of_type(rl_data, j, &sla_type_arr); + max_id = adf_rl_get_sla_arr_of_type(rl_data, j, &sla_type_arr); for (i = 0; i < max_id; i++) { if (!sla_type_arr[i]) diff --git a/drivers/crypto/intel/qat/qat_common/adf_rl.h b/drivers/crypto/intel/qat/qat_common/adf_rl.h index 269c6656fb..bfe750ea0e 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_rl.h +++ b/drivers/crypto/intel/qat/qat_common/adf_rl.h @@ -151,6 +151,8 @@ struct rl_sla { u16 ring_pairs_cnt; }; +u32 adf_rl_get_sla_arr_of_type(struct adf_rl *rl_data, enum rl_node_type type, + struct rl_sla ***sla_arr); int adf_rl_add_sla(struct adf_accel_dev *accel_dev, struct adf_rl_sla_input_data *sla_in); int adf_rl_update_sla(struct adf_accel_dev *accel_dev, diff --git a/drivers/crypto/intel/qat/qat_common/adf_sriov.c b/drivers/crypto/intel/qat/qat_common/adf_sriov.c index f44025bb6f..8d645e7e04 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sriov.c @@ -26,10 +26,12 @@ static void adf_iov_send_resp(struct work_struct *work) u32 vf_nr = vf_info->vf_nr; bool ret; + mutex_lock(&vf_info->pfvf_mig_lock); ret = adf_recv_and_handle_vf2pf_msg(accel_dev, vf_nr); if (ret) /* re-enable interrupt on PF from this VF */ adf_enable_vf2pf_interrupts(accel_dev, 1 << vf_nr); + mutex_unlock(&vf_info->pfvf_mig_lock); kfree(pf2vf_resp); } @@ -60,9 +62,9 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) /* This ptr will be populated when VFs will be created */ vf_info->accel_dev = accel_dev; vf_info->vf_nr = i; - vf_info->vf_compat_ver = 0; mutex_init(&vf_info->pf2vf_lock); + mutex_init(&vf_info->pfvf_mig_lock); ratelimit_state_init(&vf_info->vf2pf_ratelimit, ADF_VF2PF_RATELIMIT_INTERVAL, ADF_VF2PF_RATELIMIT_BURST); @@ -84,6 +86,32 @@ static int adf_enable_sriov(struct adf_accel_dev *accel_dev) return pci_enable_sriov(pdev, totalvfs); } +void adf_reenable_sriov(struct adf_accel_dev *accel_dev) +{ + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + char cfg[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; + unsigned long val = 0; + + if (adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, + ADF_SRIOV_ENABLED, cfg)) + return; + + if (!accel_dev->pf.vf_info) + return; + + if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC)) + return; + + if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC)) + return; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + dev_dbg(&pdev->dev, "Re-enabling SRIOV\n"); + adf_enable_sriov(accel_dev); +} + /** * adf_disable_sriov() - Disable SRIOV for the device * @accel_dev: Pointer to accel device. @@ -103,6 +131,7 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) return; adf_pf2vf_notify_restarting(accel_dev); + adf_pf2vf_wait_for_restarting_complete(accel_dev); pci_disable_sriov(accel_to_pci_dev(accel_dev)); /* Disable VF to PF interrupts */ @@ -112,11 +141,15 @@ void adf_disable_sriov(struct adf_accel_dev *accel_dev) if (hw_data->configure_iov_threads) hw_data->configure_iov_threads(accel_dev, false); - for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) + for (i = 0, vf = accel_dev->pf.vf_info; i < totalvfs; i++, vf++) { mutex_destroy(&vf->pf2vf_lock); + mutex_destroy(&vf->pfvf_mig_lock); + } - kfree(accel_dev->pf.vf_info); - accel_dev->pf.vf_info = NULL; + if (!test_bit(ADF_STATUS_RESTARTING, &accel_dev->status)) { + kfree(accel_dev->pf.vf_info); + accel_dev->pf.vf_info = NULL; + } } EXPORT_SYMBOL_GPL(adf_disable_sriov); @@ -194,6 +227,10 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) if (ret) return ret; + val = 1; + adf_cfg_add_key_value_param(accel_dev, ADF_GENERAL_SEC, ADF_SRIOV_ENABLED, + &val, ADF_DEC); + return numvfs; } EXPORT_SYMBOL_GPL(adf_sriov_configure); diff --git a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c index d450dad32c..4e7f70d404 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_sysfs.c +++ b/drivers/crypto/intel/qat/qat_common/adf_sysfs.c @@ -204,6 +204,42 @@ static ssize_t pm_idle_enabled_store(struct device *dev, struct device_attribute } static DEVICE_ATTR_RW(pm_idle_enabled); +static ssize_t auto_reset_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + char *auto_reset; + struct adf_accel_dev *accel_dev; + + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + + auto_reset = accel_dev->autoreset_on_error ? "on" : "off"; + + return sysfs_emit(buf, "%s\n", auto_reset); +} + +static ssize_t auto_reset_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct adf_accel_dev *accel_dev; + bool enabled = false; + int ret; + + ret = kstrtobool(buf, &enabled); + if (ret) + return ret; + + accel_dev = adf_devmgr_pci_to_accel_dev(to_pci_dev(dev)); + if (!accel_dev) + return -EINVAL; + + accel_dev->autoreset_on_error = enabled; + + return count; +} +static DEVICE_ATTR_RW(auto_reset); + static DEVICE_ATTR_RW(state); static DEVICE_ATTR_RW(cfg_services); @@ -291,6 +327,7 @@ static struct attribute *qat_attrs[] = { &dev_attr_pm_idle_enabled.attr, &dev_attr_rp2srv.attr, &dev_attr_num_rps.attr, + &dev_attr_auto_reset.attr, NULL, }; diff --git a/drivers/crypto/intel/qat/qat_common/adf_transport.c b/drivers/crypto/intel/qat/qat_common/adf_transport.c index 630d0483c4..1efdf46490 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_transport.c +++ b/drivers/crypto/intel/qat/qat_common/adf_transport.c @@ -474,7 +474,6 @@ err: int adf_init_etr_data(struct adf_accel_dev *accel_dev) { struct adf_etr_data *etr_data; - struct adf_hw_device_data *hw_data = accel_dev->hw_device; void __iomem *csr_addr; u32 size; u32 num_banks = 0; @@ -495,8 +494,7 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev) } accel_dev->transport = etr_data; - i = hw_data->get_etr_bar_id(hw_data); - csr_addr = accel_dev->accel_pci_dev.pci_bars[i].virt_addr; + csr_addr = adf_get_etr_base(accel_dev); /* accel_dev->debugfs_dir should always be non-NULL here */ etr_data->debug = debugfs_create_dir("transport", diff --git a/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c b/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c index b05c3957a1..cdbb2d687b 100644 --- a/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/intel/qat/qat_common/adf_vf_isr.c @@ -293,8 +293,6 @@ EXPORT_SYMBOL_GPL(adf_flush_vf_wq); /** * adf_init_vf_wq() - Init workqueue for VF * - * Function init workqueue 'adf_vf_stop_wq' for VF. - * * Return: 0 on success, error code otherwise. */ int __init adf_init_vf_wq(void) diff --git a/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c b/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c index 4128200a90..85c682e248 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/intel/qat/qat_common/qat_asym_algs.c @@ -110,6 +110,8 @@ struct qat_dh_ctx { unsigned int p_size; bool g2; struct qat_crypto_instance *inst; + struct crypto_kpp *ftfm; + bool fallback; } __packed __aligned(64); struct qat_asym_request { @@ -381,6 +383,36 @@ unmap_src: return ret; } +static int qat_dh_generate_public_key(struct kpp_request *req) +{ + struct kpp_request *nreq = kpp_request_ctx(req); + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + if (ctx->fallback) { + memcpy(nreq, req, sizeof(*req)); + kpp_request_set_tfm(nreq, ctx->ftfm); + return crypto_kpp_generate_public_key(nreq); + } + + return qat_dh_compute_value(req); +} + +static int qat_dh_compute_shared_secret(struct kpp_request *req) +{ + struct kpp_request *nreq = kpp_request_ctx(req); + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + if (ctx->fallback) { + memcpy(nreq, req, sizeof(*req)); + kpp_request_set_tfm(nreq, ctx->ftfm); + return crypto_kpp_compute_shared_secret(nreq); + } + + return qat_dh_compute_value(req); +} + static int qat_dh_check_params_length(unsigned int p_len) { switch (p_len) { @@ -398,9 +430,6 @@ static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - if (qat_dh_check_params_length(params->p_size << 3)) - return -EINVAL; - ctx->p_size = params->p_size; ctx->p = dma_alloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL); if (!ctx->p) @@ -454,6 +483,13 @@ static int qat_dh_set_secret(struct crypto_kpp *tfm, const void *buf, if (crypto_dh_decode_key(buf, len, ¶ms) < 0) return -EINVAL; + if (qat_dh_check_params_length(params.p_size << 3)) { + ctx->fallback = true; + return crypto_kpp_set_secret(ctx->ftfm, buf, len); + } + + ctx->fallback = false; + /* Free old secret if any */ qat_dh_clear_ctx(dev, ctx); @@ -481,6 +517,9 @@ static unsigned int qat_dh_max_size(struct crypto_kpp *tfm) { struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + if (ctx->fallback) + return crypto_kpp_maxsize(ctx->ftfm); + return ctx->p_size; } @@ -489,11 +528,22 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm) struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); struct qat_crypto_instance *inst = qat_crypto_get_instance_node(numa_node_id()); + const char *alg = kpp_alg_name(tfm); + unsigned int reqsize; if (!inst) return -EINVAL; - kpp_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64); + ctx->ftfm = crypto_alloc_kpp(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->ftfm)) + return PTR_ERR(ctx->ftfm); + + crypto_kpp_set_flags(ctx->ftfm, crypto_kpp_get_flags(tfm)); + + reqsize = max(sizeof(struct qat_asym_request) + 64, + sizeof(struct kpp_request) + crypto_kpp_reqsize(ctx->ftfm)); + + kpp_set_reqsize(tfm, reqsize); ctx->p_size = 0; ctx->g2 = false; @@ -506,6 +556,9 @@ static void qat_dh_exit_tfm(struct crypto_kpp *tfm) struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); struct device *dev = &GET_DEV(ctx->inst->accel_dev); + if (ctx->ftfm) + crypto_free_kpp(ctx->ftfm); + qat_dh_clear_ctx(dev, ctx); qat_crypto_put_instance(ctx->inst); } @@ -1265,8 +1318,8 @@ static struct akcipher_alg rsa = { static struct kpp_alg dh = { .set_secret = qat_dh_set_secret, - .generate_public_key = qat_dh_compute_value, - .compute_shared_secret = qat_dh_compute_value, + .generate_public_key = qat_dh_generate_public_key, + .compute_shared_secret = qat_dh_compute_shared_secret, .max_size = qat_dh_max_size, .init = qat_dh_init_tfm, .exit = qat_dh_exit_tfm, @@ -1276,6 +1329,7 @@ static struct kpp_alg dh = { .cra_priority = 1000, .cra_module = THIS_MODULE, .cra_ctxsize = sizeof(struct qat_dh_ctx), + .cra_flags = CRYPTO_ALG_NEED_FALLBACK, }, }; diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.c b/drivers/crypto/intel/qat/qat_common/qat_bl.c index 76baed0a76..338acf29c4 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_bl.c +++ b/drivers/crypto/intel/qat/qat_common/qat_bl.c @@ -81,7 +81,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (unlikely(!bufl)) return -ENOMEM; } else { - bufl = &buf->sgl_src.sgl_hdr; + bufl = container_of(&buf->sgl_src.sgl_hdr, + struct qat_alg_buf_list, hdr); memset(bufl, 0, sizeof(struct qat_alg_buf_list)); buf->sgl_src_valid = true; } @@ -139,7 +140,8 @@ static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, if (unlikely(!buflout)) goto err_in; } else { - buflout = &buf->sgl_dst.sgl_hdr; + buflout = container_of(&buf->sgl_dst.sgl_hdr, + struct qat_alg_buf_list, hdr); memset(buflout, 0, sizeof(struct qat_alg_buf_list)); buf->sgl_dst_valid = true; } diff --git a/drivers/crypto/intel/qat/qat_common/qat_bl.h b/drivers/crypto/intel/qat/qat_common/qat_bl.h index d87e4f35ac..85bc32a9ec 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_bl.h +++ b/drivers/crypto/intel/qat/qat_common/qat_bl.h @@ -15,14 +15,17 @@ struct qat_alg_buf { } __packed; struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; + /* New members must be added within the __struct_group() macro below. */ + __struct_group(qat_alg_buf_list_hdr, hdr, __packed, + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + ); struct qat_alg_buf buffers[]; } __packed; struct qat_alg_fixed_buf_list { - struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf_list_hdr sgl_hdr; struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; } __packed __aligned(64); diff --git a/drivers/crypto/intel/qat/qat_common/qat_crypto.c b/drivers/crypto/intel/qat/qat_common/qat_crypto.c index 40c8e74d1c..101c6ea416 100644 --- a/drivers/crypto/intel/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/intel/qat/qat_common/qat_crypto.c @@ -105,8 +105,8 @@ struct qat_crypto_instance *qat_crypto_get_instance_node(int node) } /** - * qat_crypto_vf_dev_config() - * create dev config required to create crypto inst. + * qat_crypto_vf_dev_config() - create dev config required to create + * crypto inst. * * @accel_dev: Pointer to acceleration device. * diff --git a/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c b/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c new file mode 100644 index 0000000000..892c2283a5 --- /dev/null +++ b/drivers/crypto/intel/qat/qat_common/qat_mig_dev.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2024 Intel Corporation */ +#include <linux/dev_printk.h> +#include <linux/export.h> +#include <linux/pci.h> +#include <linux/types.h> +#include <linux/qat/qat_mig_dev.h> +#include "adf_accel_devices.h" +#include "adf_common_drv.h" + +struct qat_mig_dev *qat_vfmig_create(struct pci_dev *pdev, int vf_id) +{ + struct adf_accel_dev *accel_dev; + struct qat_migdev_ops *ops; + struct qat_mig_dev *mdev; + + accel_dev = adf_devmgr_pci_to_accel_dev(pdev); + if (!accel_dev) + return ERR_PTR(-ENODEV); + + ops = GET_VFMIG_OPS(accel_dev); + if (!ops || !ops->init || !ops->cleanup || !ops->reset || !ops->open || + !ops->close || !ops->suspend || !ops->resume || !ops->save_state || + !ops->load_state || !ops->save_setup || !ops->load_setup) + return ERR_PTR(-EINVAL); + + mdev = kmalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(-ENOMEM); + + mdev->vf_id = vf_id; + mdev->parent_accel_dev = accel_dev; + + return mdev; +} +EXPORT_SYMBOL_GPL(qat_vfmig_create); + +int qat_vfmig_init(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->init(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_init); + +void qat_vfmig_cleanup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->cleanup(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_cleanup); + +void qat_vfmig_reset(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->reset(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_reset); + +int qat_vfmig_open(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->open(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_open); + +void qat_vfmig_close(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + GET_VFMIG_OPS(accel_dev)->close(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_close); + +int qat_vfmig_suspend(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->suspend(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_suspend); + +int qat_vfmig_resume(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->resume(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_resume); + +int qat_vfmig_save_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->save_state(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_save_state); + +int qat_vfmig_save_setup(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->save_setup(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_save_setup); + +int qat_vfmig_load_state(struct qat_mig_dev *mdev) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->load_state(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_load_state); + +int qat_vfmig_load_setup(struct qat_mig_dev *mdev, int size) +{ + struct adf_accel_dev *accel_dev = mdev->parent_accel_dev; + + return GET_VFMIG_OPS(accel_dev)->load_setup(mdev, size); +} +EXPORT_SYMBOL_GPL(qat_vfmig_load_setup); + +void qat_vfmig_destroy(struct qat_mig_dev *mdev) +{ + kfree(mdev); +} +EXPORT_SYMBOL_GPL(qat_vfmig_destroy); diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/Makefile b/drivers/crypto/intel/qat/qat_dh895xcc/Makefile index 38d6f8e162..cfd3bd7577 100644 --- a/drivers/crypto/intel/qat/qat_dh895xcc/Makefile +++ b/drivers/crypto/intel/qat/qat_dh895xcc/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/../qat_common +ccflags-y := -I $(src)/../qat_common obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCC) += qat_dh895xcc.o qat_dh895xcc-objs := adf_drv.o adf_dh895xcc_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index af14090cc4..6e24d57e6b 100644 --- a/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/intel/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -5,6 +5,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include "adf_dh895xcc_hw_data.h" diff --git a/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile b/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile index 0153c85ce7..64b54e92b2 100644 --- a/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile +++ b/drivers/crypto/intel/qat/qat_dh895xccvf/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -ccflags-y := -I $(srctree)/$(src)/../qat_common +ccflags-y := -I $(src)/../qat_common obj-$(CONFIG_CRYPTO_DEV_QAT_DH895xCCVF) += qat_dh895xccvf.o qat_dh895xccvf-objs := adf_drv.o adf_dh895xccvf_hw_data.o diff --git a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index 70e56cc16e..f4ee4c2e00 100644 --- a/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/intel/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -4,6 +4,7 @@ #include <adf_common_drv.h> #include <adf_gen2_config.h> #include <adf_gen2_dc.h> +#include <adf_gen2_hw_csr_data.h> #include <adf_gen2_hw_data.h> #include <adf_gen2_pfvf.h> #include <adf_pfvf_vf_msg.h> diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c index d2b8d26db9..215a1a8ba7 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_devlink.c @@ -4,7 +4,8 @@ #include "otx2_cpt_devlink.h" static int otx2_cpt_dl_egrp_create(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; @@ -13,7 +14,8 @@ static int otx2_cpt_dl_egrp_create(struct devlink *dl, u32 id, } static int otx2_cpt_dl_egrp_delete(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; @@ -45,7 +47,8 @@ static int otx2_cpt_dl_t106_mode_get(struct devlink *dl, u32 id, } static int otx2_cpt_dl_t106_mode_set(struct devlink *dl, u32 id, - struct devlink_param_gset_ctx *ctx) + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) { struct otx2_cpt_devlink *cpt_dl = devlink_priv(dl); struct otx2_cptpf_dev *cptpf = cpt_dl->cptpf; diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 2b3ebe0db3..c82775dbb5 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -15,6 +15,7 @@ #include <linux/platform_device.h> #include <linux/stmp_device.h> #include <linux/clk.h> +#include <soc/fsl/dcp.h> #include <crypto/aes.h> #include <crypto/sha1.h> @@ -101,6 +102,7 @@ struct dcp_async_ctx { struct crypto_skcipher *fallback; unsigned int key_len; uint8_t key[AES_KEYSIZE_128]; + bool key_referenced; }; struct dcp_aes_req_ctx { @@ -155,6 +157,7 @@ static struct dcp *global_sdcp; #define MXS_DCP_CONTROL0_HASH_TERM (1 << 13) #define MXS_DCP_CONTROL0_HASH_INIT (1 << 12) #define MXS_DCP_CONTROL0_PAYLOAD_KEY (1 << 11) +#define MXS_DCP_CONTROL0_OTP_KEY (1 << 10) #define MXS_DCP_CONTROL0_CIPHER_ENCRYPT (1 << 8) #define MXS_DCP_CONTROL0_CIPHER_INIT (1 << 9) #define MXS_DCP_CONTROL0_ENABLE_HASH (1 << 6) @@ -168,6 +171,8 @@ static struct dcp *global_sdcp; #define MXS_DCP_CONTROL1_CIPHER_MODE_ECB (0 << 4) #define MXS_DCP_CONTROL1_CIPHER_SELECT_AES128 (0 << 0) +#define MXS_DCP_CONTROL1_KEY_SELECT_SHIFT 8 + static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) { int dma_err; @@ -220,17 +225,21 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, struct skcipher_request *req, int init) { - dma_addr_t key_phys, src_phys, dst_phys; + dma_addr_t key_phys = 0; + dma_addr_t src_phys, dst_phys; struct dcp *sdcp = global_sdcp; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req); + bool key_referenced = actx->key_referenced; int ret; - key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, - 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); - ret = dma_mapping_error(sdcp->dev, key_phys); - if (ret) - return ret; + if (!key_referenced) { + key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, + 2 * AES_KEYSIZE_128, DMA_TO_DEVICE); + ret = dma_mapping_error(sdcp->dev, key_phys); + if (ret) + return ret; + } src_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_in_buf, DCP_BUF_SZ, DMA_TO_DEVICE); @@ -255,8 +264,12 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, MXS_DCP_CONTROL0_INTERRUPT | MXS_DCP_CONTROL0_ENABLE_CIPHER; - /* Payload contains the key. */ - desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY; + if (key_referenced) + /* Set OTP key bit to select the key via KEY_SELECT. */ + desc->control0 |= MXS_DCP_CONTROL0_OTP_KEY; + else + /* Payload contains the key. */ + desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY; if (rctx->enc) desc->control0 |= MXS_DCP_CONTROL0_CIPHER_ENCRYPT; @@ -270,6 +283,9 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, else desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_CBC; + if (key_referenced) + desc->control1 |= sdcp->coh->aes_key[0] << MXS_DCP_CONTROL1_KEY_SELECT_SHIFT; + desc->next_cmd_addr = 0; desc->source = src_phys; desc->destination = dst_phys; @@ -284,9 +300,9 @@ aes_done_run: err_dst: dma_unmap_single(sdcp->dev, src_phys, DCP_BUF_SZ, DMA_TO_DEVICE); err_src: - dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, - DMA_TO_DEVICE); - + if (!key_referenced) + dma_unmap_single(sdcp->dev, key_phys, 2 * AES_KEYSIZE_128, + DMA_TO_DEVICE); return ret; } @@ -453,7 +469,7 @@ static int mxs_dcp_aes_enqueue(struct skcipher_request *req, int enc, int ecb) struct dcp_aes_req_ctx *rctx = skcipher_request_ctx(req); int ret; - if (unlikely(actx->key_len != AES_KEYSIZE_128)) + if (unlikely(actx->key_len != AES_KEYSIZE_128 && !actx->key_referenced)) return mxs_dcp_block_fallback(req, enc); rctx->enc = enc; @@ -500,6 +516,7 @@ static int mxs_dcp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, * there can still be an operation in progress. */ actx->key_len = len; + actx->key_referenced = false; if (len == AES_KEYSIZE_128) { memcpy(actx->key, key, len); return 0; @@ -516,6 +533,32 @@ static int mxs_dcp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, return crypto_skcipher_setkey(actx->fallback, key, len); } +static int mxs_dcp_aes_setrefkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int len) +{ + struct dcp_async_ctx *actx = crypto_skcipher_ctx(tfm); + + if (len != DCP_PAES_KEYSIZE) + return -EINVAL; + + switch (key[0]) { + case DCP_PAES_KEY_SLOT0: + case DCP_PAES_KEY_SLOT1: + case DCP_PAES_KEY_SLOT2: + case DCP_PAES_KEY_SLOT3: + case DCP_PAES_KEY_UNIQUE: + case DCP_PAES_KEY_OTP: + memcpy(actx->key, key, len); + actx->key_len = len; + actx->key_referenced = true; + break; + default: + return -EINVAL; + } + + return 0; +} + static int mxs_dcp_aes_fallback_init_tfm(struct crypto_skcipher *tfm) { const char *name = crypto_tfm_alg_name(crypto_skcipher_tfm(tfm)); @@ -539,6 +582,13 @@ static void mxs_dcp_aes_fallback_exit_tfm(struct crypto_skcipher *tfm) crypto_free_skcipher(actx->fallback); } +static int mxs_dcp_paes_init_tfm(struct crypto_skcipher *tfm) +{ + crypto_skcipher_set_reqsize(tfm, sizeof(struct dcp_aes_req_ctx)); + + return 0; +} + /* * Hashing (SHA1/SHA256) */ @@ -889,6 +939,39 @@ static struct skcipher_alg dcp_aes_algs[] = { .ivsize = AES_BLOCK_SIZE, .init = mxs_dcp_aes_fallback_init_tfm, .exit = mxs_dcp_aes_fallback_exit_tfm, + }, { + .base.cra_name = "ecb(paes)", + .base.cra_driver_name = "ecb-paes-dcp", + .base.cra_priority = 401, + .base.cra_alignmask = 15, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct dcp_async_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = DCP_PAES_KEYSIZE, + .max_keysize = DCP_PAES_KEYSIZE, + .setkey = mxs_dcp_aes_setrefkey, + .encrypt = mxs_dcp_aes_ecb_encrypt, + .decrypt = mxs_dcp_aes_ecb_decrypt, + .init = mxs_dcp_paes_init_tfm, + }, { + .base.cra_name = "cbc(paes)", + .base.cra_driver_name = "cbc-paes-dcp", + .base.cra_priority = 401, + .base.cra_alignmask = 15, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_INTERNAL, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct dcp_async_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = DCP_PAES_KEYSIZE, + .max_keysize = DCP_PAES_KEYSIZE, + .setkey = mxs_dcp_aes_setrefkey, + .encrypt = mxs_dcp_aes_cbc_encrypt, + .decrypt = mxs_dcp_aes_cbc_decrypt, + .ivsize = AES_BLOCK_SIZE, + .init = mxs_dcp_paes_init_tfm, }, }; diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 7a3083debc..59d472cb11 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -41,7 +41,7 @@ static const char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; -MODULE_AUTHOR("David S. Miller (davem@davemloft.net)"); +MODULE_AUTHOR("David S. Miller <davem@davemloft.net>"); MODULE_DESCRIPTION("Niagara2 Crypto driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c index 2ab90ec10e..82214cde2b 100644 --- a/drivers/crypto/nx/nx-842.c +++ b/drivers/crypto/nx/nx-842.c @@ -251,7 +251,9 @@ int nx842_crypto_compress(struct crypto_tfm *tfm, u8 *dst, unsigned int *dlen) { struct nx842_crypto_ctx *ctx = crypto_tfm_ctx(tfm); - struct nx842_crypto_header *hdr = &ctx->header; + struct nx842_crypto_header *hdr = + container_of(&ctx->header, + struct nx842_crypto_header, hdr); struct nx842_crypto_param p; struct nx842_constraints c = *ctx->driver->constraints; unsigned int groups, hdrsize, h; @@ -490,7 +492,7 @@ int nx842_crypto_decompress(struct crypto_tfm *tfm, } memcpy(&ctx->header, src, hdr_len); - hdr = &ctx->header; + hdr = container_of(&ctx->header, struct nx842_crypto_header, hdr); for (n = 0; n < hdr->groups; n++) { /* ignore applies to last group */ diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h index 7590bfb24d..25fa70b211 100644 --- a/drivers/crypto/nx/nx-842.h +++ b/drivers/crypto/nx/nx-842.h @@ -157,9 +157,11 @@ struct nx842_crypto_header_group { } __packed; struct nx842_crypto_header { - __be16 magic; /* NX842_CRYPTO_MAGIC */ - __be16 ignore; /* decompressed end bytes to ignore */ - u8 groups; /* total groups in this header */ + struct_group_tagged(nx842_crypto_header_hdr, hdr, + __be16 magic; /* NX842_CRYPTO_MAGIC */ + __be16 ignore; /* decompressed end bytes to ignore */ + u8 groups; /* total groups in this header */ + ); struct nx842_crypto_header_group group[]; } __packed; @@ -171,7 +173,7 @@ struct nx842_crypto_ctx { u8 *wmem; u8 *sbounce, *dbounce; - struct nx842_crypto_header header; + struct nx842_crypto_header_hdr header; struct nx842_crypto_header_group group[NX842_CRYPTO_GROUP_MAX]; struct nx842_driver *driver; diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 70edf40bc5..f74b3c81ba 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -371,6 +371,11 @@ static int rk_crypto_probe(struct platform_device *pdev) } crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true); + if (!crypto_info->engine) { + err = -ENOMEM; + goto err_crypto; + } + crypto_engine_start(crypto_info->engine); init_completion(&crypto_info->complete); diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 3423b5cde1..96d4af5d48 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -559,7 +559,7 @@ static int sahara_aes_process(struct skcipher_request *req) struct sahara_ctx *ctx; struct sahara_aes_reqctx *rctx; int ret; - unsigned long timeout; + unsigned long time_left; /* Request is ready to be dispatched by the device */ dev_dbg(dev->device, @@ -597,15 +597,15 @@ static int sahara_aes_process(struct skcipher_request *req) if (ret) return -EINVAL; - timeout = wait_for_completion_timeout(&dev->dma_completion, - msecs_to_jiffies(SAHARA_TIMEOUT_MS)); + time_left = wait_for_completion_timeout(&dev->dma_completion, + msecs_to_jiffies(SAHARA_TIMEOUT_MS)); dma_unmap_sg(dev->device, dev->out_sg, dev->nb_out_sg, DMA_FROM_DEVICE); dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, DMA_TO_DEVICE); - if (!timeout) { + if (!time_left) { dev_err(dev->device, "AES timeout\n"); return -ETIMEDOUT; } @@ -931,7 +931,7 @@ static int sahara_sha_process(struct ahash_request *req) struct sahara_dev *dev = dev_ptr; struct sahara_sha_reqctx *rctx = ahash_request_ctx(req); int ret; - unsigned long timeout; + unsigned long time_left; ret = sahara_sha_prepare_request(req); if (!ret) @@ -963,14 +963,14 @@ static int sahara_sha_process(struct ahash_request *req) sahara_write(dev, dev->hw_phys_desc[0], SAHARA_REG_DAR); - timeout = wait_for_completion_timeout(&dev->dma_completion, - msecs_to_jiffies(SAHARA_TIMEOUT_MS)); + time_left = wait_for_completion_timeout(&dev->dma_completion, + msecs_to_jiffies(SAHARA_TIMEOUT_MS)); if (rctx->sg_in_idx) dma_unmap_sg(dev->device, dev->in_sg, dev->nb_in_sg, DMA_TO_DEVICE); - if (!timeout) { + if (!time_left) { dev_err(dev->device, "SHA timeout\n"); return -ETIMEDOUT; } diff --git a/drivers/crypto/starfive/Kconfig b/drivers/crypto/starfive/Kconfig index cb59357b58..0fe389e9f9 100644 --- a/drivers/crypto/starfive/Kconfig +++ b/drivers/crypto/starfive/Kconfig @@ -14,6 +14,10 @@ config CRYPTO_DEV_JH7110 select CRYPTO_RSA select CRYPTO_AES select CRYPTO_CCM + select CRYPTO_GCM + select CRYPTO_ECB + select CRYPTO_CBC + select CRYPTO_CTR help Support for StarFive JH7110 crypto hardware acceleration engine. This module provides acceleration for public key algo, diff --git a/drivers/crypto/starfive/jh7110-aes.c b/drivers/crypto/starfive/jh7110-aes.c index 1ac15cc4ef..86a1a1fa9f 100644 --- a/drivers/crypto/starfive/jh7110-aes.c +++ b/drivers/crypto/starfive/jh7110-aes.c @@ -78,7 +78,7 @@ static inline int is_gcm(struct starfive_cryp_dev *cryp) return (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM; } -static inline int is_encrypt(struct starfive_cryp_dev *cryp) +static inline bool is_encrypt(struct starfive_cryp_dev *cryp) { return cryp->flags & FLG_ENCRYPT; } @@ -103,16 +103,6 @@ static void starfive_aes_aead_hw_start(struct starfive_cryp_ctx *ctx, u32 hw_mod } } -static inline void starfive_aes_set_ivlen(struct starfive_cryp_ctx *ctx) -{ - struct starfive_cryp_dev *cryp = ctx->cryp; - - if (is_gcm(cryp)) - writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN); - else - writel(AES_BLOCK_SIZE, cryp->base + STARFIVE_AES_IVLEN); -} - static inline void starfive_aes_set_alen(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -261,7 +251,6 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) rctx->csr.aes.mode = hw_mode; rctx->csr.aes.cmode = !is_encrypt(cryp); - rctx->csr.aes.ie = 1; rctx->csr.aes.stmode = STARFIVE_AES_MODE_XFB_1; if (cryp->side_chan) { @@ -279,7 +268,7 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) case STARFIVE_AES_MODE_GCM: starfive_aes_set_alen(ctx); starfive_aes_set_mlen(ctx); - starfive_aes_set_ivlen(ctx); + writel(GCM_AES_IV_SIZE, cryp->base + STARFIVE_AES_IVLEN); starfive_aes_aead_hw_start(ctx, hw_mode); starfive_aes_write_iv(ctx, (void *)cryp->req.areq->iv); break; @@ -300,52 +289,49 @@ static int starfive_aes_hw_init(struct starfive_cryp_ctx *ctx) return cryp->err; } -static int starfive_aes_read_authtag(struct starfive_cryp_dev *cryp) +static int starfive_aes_read_authtag(struct starfive_cryp_ctx *ctx) { - int i, start_addr; + struct starfive_cryp_dev *cryp = ctx->cryp; + struct starfive_cryp_request_ctx *rctx = ctx->rctx; + int i; if (starfive_aes_wait_busy(cryp)) return dev_err_probe(cryp->dev, -ETIMEDOUT, "Timeout waiting for tag generation."); - start_addr = STARFIVE_AES_NONCE0; - - if (is_gcm(cryp)) - for (i = 0; i < AES_BLOCK_32; i++, start_addr += 4) - cryp->tag_out[i] = readl(cryp->base + start_addr); - else + if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_GCM) { + cryp->tag_out[0] = readl(cryp->base + STARFIVE_AES_NONCE0); + cryp->tag_out[1] = readl(cryp->base + STARFIVE_AES_NONCE1); + cryp->tag_out[2] = readl(cryp->base + STARFIVE_AES_NONCE2); + cryp->tag_out[3] = readl(cryp->base + STARFIVE_AES_NONCE3); + } else { for (i = 0; i < AES_BLOCK_32; i++) cryp->tag_out[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R); + } if (is_encrypt(cryp)) { - scatterwalk_copychunks(cryp->tag_out, &cryp->out_walk, cryp->authsize, 1); + scatterwalk_map_and_copy(cryp->tag_out, rctx->out_sg, + cryp->total_in, cryp->authsize, 1); } else { - scatterwalk_copychunks(cryp->tag_in, &cryp->in_walk, cryp->authsize, 0); - if (crypto_memneq(cryp->tag_in, cryp->tag_out, cryp->authsize)) - return dev_err_probe(cryp->dev, -EBADMSG, "Failed tag verification\n"); + return -EBADMSG; } return 0; } -static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp) +static void starfive_aes_finish_req(struct starfive_cryp_ctx *ctx) { - union starfive_aes_csr csr; + struct starfive_cryp_dev *cryp = ctx->cryp; int err = cryp->err; if (!err && cryp->authsize) - err = starfive_aes_read_authtag(cryp); + err = starfive_aes_read_authtag(ctx); if (!err && ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CBC || (cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CTR)) starfive_aes_get_iv(cryp, (void *)cryp->req.sreq->iv); - /* reset irq flags*/ - csr.v = 0; - csr.aesrst = 1; - writel(csr.v, cryp->base + STARFIVE_AES_CSR); - if (cryp->authsize) crypto_finalize_aead_request(cryp->engine, cryp->req.areq, err); else @@ -353,39 +339,6 @@ static void starfive_aes_finish_req(struct starfive_cryp_dev *cryp) err); } -void starfive_aes_done_task(unsigned long param) -{ - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param; - u32 block[AES_BLOCK_32]; - u32 stat; - int i; - - for (i = 0; i < AES_BLOCK_32; i++) - block[i] = readl(cryp->base + STARFIVE_AES_AESDIO0R); - - scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_out), 1); - - cryp->total_out -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_out); - - if (!cryp->total_out) { - starfive_aes_finish_req(cryp); - return; - } - - memset(block, 0, AES_BLOCK_SIZE); - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); - - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); -} - static int starfive_aes_gcm_write_adata(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -451,60 +404,165 @@ static int starfive_aes_ccm_write_adata(struct starfive_cryp_ctx *ctx) return 0; } -static int starfive_aes_prepare_req(struct skcipher_request *req, - struct aead_request *areq) +static void starfive_aes_dma_done(void *param) { - struct starfive_cryp_ctx *ctx; - struct starfive_cryp_request_ctx *rctx; - struct starfive_cryp_dev *cryp; + struct starfive_cryp_dev *cryp = param; - if (!req && !areq) - return -EINVAL; + complete(&cryp->dma_done); +} - ctx = req ? crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)) : - crypto_aead_ctx(crypto_aead_reqtfm(areq)); +static void starfive_aes_dma_init(struct starfive_cryp_dev *cryp) +{ + cryp->cfg_in.direction = DMA_MEM_TO_DEV; + cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; + cryp->cfg_in.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cryp->cfg_in.src_maxburst = cryp->dma_maxburst; + cryp->cfg_in.dst_maxburst = cryp->dma_maxburst; + cryp->cfg_in.dst_addr = cryp->phys_base + STARFIVE_ALG_FIFO_OFFSET; - cryp = ctx->cryp; - rctx = req ? skcipher_request_ctx(req) : aead_request_ctx(areq); + dmaengine_slave_config(cryp->tx, &cryp->cfg_in); - if (req) { - cryp->req.sreq = req; - cryp->total_in = req->cryptlen; - cryp->total_out = req->cryptlen; - cryp->assoclen = 0; - cryp->authsize = 0; - } else { - cryp->req.areq = areq; - cryp->assoclen = areq->assoclen; - cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(areq)); - if (is_encrypt(cryp)) { - cryp->total_in = areq->cryptlen; - cryp->total_out = areq->cryptlen; - } else { - cryp->total_in = areq->cryptlen - cryp->authsize; - cryp->total_out = cryp->total_in; - } - } + cryp->cfg_out.direction = DMA_DEV_TO_MEM; + cryp->cfg_out.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cryp->cfg_out.dst_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; + cryp->cfg_out.src_maxburst = 4; + cryp->cfg_out.dst_maxburst = 4; + cryp->cfg_out.src_addr = cryp->phys_base + STARFIVE_ALG_FIFO_OFFSET; - rctx->in_sg = req ? req->src : areq->src; - scatterwalk_start(&cryp->in_walk, rctx->in_sg); + dmaengine_slave_config(cryp->rx, &cryp->cfg_out); - rctx->out_sg = req ? req->dst : areq->dst; - scatterwalk_start(&cryp->out_walk, rctx->out_sg); + init_completion(&cryp->dma_done); +} - if (cryp->assoclen) { - rctx->adata = kzalloc(cryp->assoclen + AES_BLOCK_SIZE, GFP_KERNEL); - if (!rctx->adata) - return dev_err_probe(cryp->dev, -ENOMEM, - "Failed to alloc memory for adata"); +static int starfive_aes_dma_xfer(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst, + int len) +{ + struct dma_async_tx_descriptor *in_desc, *out_desc; + union starfive_alg_cr alg_cr; + int ret = 0, in_save, out_save; + + alg_cr.v = 0; + alg_cr.start = 1; + alg_cr.aes_dma_en = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + in_save = sg_dma_len(src); + out_save = sg_dma_len(dst); - scatterwalk_copychunks(rctx->adata, &cryp->in_walk, cryp->assoclen, 0); - scatterwalk_copychunks(NULL, &cryp->out_walk, cryp->assoclen, 2); + writel(ALIGN(len, AES_BLOCK_SIZE), cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); + writel(ALIGN(len, AES_BLOCK_SIZE), cryp->base + STARFIVE_DMA_OUT_LEN_OFFSET); + + sg_dma_len(src) = ALIGN(len, AES_BLOCK_SIZE); + sg_dma_len(dst) = ALIGN(len, AES_BLOCK_SIZE); + + out_desc = dmaengine_prep_slave_sg(cryp->rx, dst, 1, DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!out_desc) { + ret = -EINVAL; + goto dma_err; } - ctx->rctx = rctx; + out_desc->callback = starfive_aes_dma_done; + out_desc->callback_param = cryp; + + reinit_completion(&cryp->dma_done); + dmaengine_submit(out_desc); + dma_async_issue_pending(cryp->rx); + + in_desc = dmaengine_prep_slave_sg(cryp->tx, src, 1, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!in_desc) { + ret = -EINVAL; + goto dma_err; + } + + dmaengine_submit(in_desc); + dma_async_issue_pending(cryp->tx); + + if (!wait_for_completion_timeout(&cryp->dma_done, + msecs_to_jiffies(1000))) + ret = -ETIMEDOUT; + +dma_err: + sg_dma_len(src) = in_save; + sg_dma_len(dst) = out_save; + + alg_cr.v = 0; + alg_cr.clear = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + return ret; +} + +static int starfive_aes_map_sg(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst) +{ + struct scatterlist *stsg, *dtsg; + struct scatterlist _src[2], _dst[2]; + unsigned int remain = cryp->total_in; + unsigned int len, src_nents, dst_nents; + int ret; + + if (src == dst) { + for (stsg = src, dtsg = dst; remain > 0; + stsg = sg_next(stsg), dtsg = sg_next(dtsg)) { + src_nents = dma_map_sg(cryp->dev, stsg, 1, DMA_BIDIRECTIONAL); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg error\n"); + + dst_nents = src_nents; + len = min(sg_dma_len(stsg), remain); + + ret = starfive_aes_dma_xfer(cryp, stsg, dtsg, len); + dma_unmap_sg(cryp->dev, stsg, 1, DMA_BIDIRECTIONAL); + if (ret) + return ret; + + remain -= len; + } + } else { + for (stsg = src, dtsg = dst;;) { + src_nents = dma_map_sg(cryp->dev, stsg, 1, DMA_TO_DEVICE); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg src error\n"); + + dst_nents = dma_map_sg(cryp->dev, dtsg, 1, DMA_FROM_DEVICE); + if (dst_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg dst error\n"); + + len = min(sg_dma_len(stsg), sg_dma_len(dtsg)); + len = min(len, remain); + + ret = starfive_aes_dma_xfer(cryp, stsg, dtsg, len); + dma_unmap_sg(cryp->dev, stsg, 1, DMA_TO_DEVICE); + dma_unmap_sg(cryp->dev, dtsg, 1, DMA_FROM_DEVICE); + if (ret) + return ret; + + remain -= len; + if (remain == 0) + break; + + if (sg_dma_len(stsg) - len) { + stsg = scatterwalk_ffwd(_src, stsg, len); + dtsg = sg_next(dtsg); + } else if (sg_dma_len(dtsg) - len) { + dtsg = scatterwalk_ffwd(_dst, dtsg, len); + stsg = sg_next(stsg); + } else { + stsg = sg_next(stsg); + dtsg = sg_next(dtsg); + } + } + } - return starfive_aes_hw_init(ctx); + return 0; } static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq) @@ -513,35 +571,42 @@ static int starfive_aes_do_one_req(struct crypto_engine *engine, void *areq) container_of(areq, struct skcipher_request, base); struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct starfive_cryp_request_ctx *rctx = skcipher_request_ctx(req); struct starfive_cryp_dev *cryp = ctx->cryp; - u32 block[AES_BLOCK_32]; - u32 stat; - int err; - int i; + int ret; - err = starfive_aes_prepare_req(req, NULL); - if (err) - return err; + cryp->req.sreq = req; + cryp->total_in = req->cryptlen; + cryp->total_out = req->cryptlen; + cryp->assoclen = 0; + cryp->authsize = 0; - /* - * Write first plain/ciphertext block to start the module - * then let irq tasklet handle the rest of the data blocks. - */ - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); + rctx->in_sg = req->src; + rctx->out_sg = req->dst; + + ctx->rctx = rctx; + + ret = starfive_aes_hw_init(ctx); + if (ret) + return ret; - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); + if (!cryp->total_in) + goto finish_req; - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); + starfive_aes_dma_init(cryp); + + ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg); + if (ret) + return ret; + +finish_req: + starfive_aes_finish_req(ctx); return 0; } -static int starfive_aes_init_tfm(struct crypto_skcipher *tfm) +static int starfive_aes_init_tfm(struct crypto_skcipher *tfm, + const char *alg_name) { struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm); @@ -549,12 +614,26 @@ static int starfive_aes_init_tfm(struct crypto_skcipher *tfm) if (!ctx->cryp) return -ENODEV; + ctx->skcipher_fbk = crypto_alloc_skcipher(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->skcipher_fbk)) + return dev_err_probe(ctx->cryp->dev, PTR_ERR(ctx->skcipher_fbk), + "%s() failed to allocate fallback for %s\n", + __func__, alg_name); + crypto_skcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + - sizeof(struct skcipher_request)); + crypto_skcipher_reqsize(ctx->skcipher_fbk)); return 0; } +static void starfive_aes_exit_tfm(struct crypto_skcipher *tfm) +{ + struct starfive_cryp_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->skcipher_fbk); +} + static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq) { struct aead_request *req = @@ -562,79 +641,99 @@ static int starfive_aes_aead_do_one_req(struct crypto_engine *engine, void *areq struct starfive_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); struct starfive_cryp_dev *cryp = ctx->cryp; - struct starfive_cryp_request_ctx *rctx; - u32 block[AES_BLOCK_32]; - u32 stat; - int err; - int i; + struct starfive_cryp_request_ctx *rctx = aead_request_ctx(req); + struct scatterlist _src[2], _dst[2]; + int ret; + + cryp->req.areq = req; + cryp->assoclen = req->assoclen; + cryp->authsize = crypto_aead_authsize(crypto_aead_reqtfm(req)); + + rctx->in_sg = scatterwalk_ffwd(_src, req->src, cryp->assoclen); + if (req->src == req->dst) + rctx->out_sg = rctx->in_sg; + else + rctx->out_sg = scatterwalk_ffwd(_dst, req->dst, cryp->assoclen); + + if (is_encrypt(cryp)) { + cryp->total_in = req->cryptlen; + cryp->total_out = req->cryptlen; + } else { + cryp->total_in = req->cryptlen - cryp->authsize; + cryp->total_out = cryp->total_in; + scatterwalk_map_and_copy(cryp->tag_in, req->src, + cryp->total_in + cryp->assoclen, + cryp->authsize, 0); + } - err = starfive_aes_prepare_req(NULL, req); - if (err) - return err; + if (cryp->assoclen) { + rctx->adata = kzalloc(cryp->assoclen + AES_BLOCK_SIZE, GFP_KERNEL); + if (!rctx->adata) + return dev_err_probe(cryp->dev, -ENOMEM, + "Failed to alloc memory for adata"); + + if (sg_copy_to_buffer(req->src, sg_nents_for_len(req->src, cryp->assoclen), + rctx->adata, cryp->assoclen) != cryp->assoclen) + return -EINVAL; + } + + if (cryp->total_in) + sg_zero_buffer(rctx->in_sg, sg_nents(rctx->in_sg), + sg_dma_len(rctx->in_sg) - cryp->total_in, + cryp->total_in); - rctx = ctx->rctx; + ctx->rctx = rctx; + + ret = starfive_aes_hw_init(ctx); + if (ret) + return ret; if (!cryp->assoclen) goto write_text; if ((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM) - cryp->err = starfive_aes_ccm_write_adata(ctx); + ret = starfive_aes_ccm_write_adata(ctx); else - cryp->err = starfive_aes_gcm_write_adata(ctx); + ret = starfive_aes_gcm_write_adata(ctx); kfree(rctx->adata); - if (cryp->err) - return cryp->err; + if (ret) + return ret; write_text: if (!cryp->total_in) goto finish_req; - /* - * Write first plain/ciphertext block to start the module - * then let irq tasklet handle the rest of the data blocks. - */ - scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, AES_BLOCK_SIZE, - cryp->total_in), 0); - cryp->total_in -= min_t(size_t, AES_BLOCK_SIZE, cryp->total_in); - - for (i = 0; i < AES_BLOCK_32; i++) - writel(block[i], cryp->base + STARFIVE_AES_AESDIO0R); - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_AES_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); + starfive_aes_dma_init(cryp); - return 0; + ret = starfive_aes_map_sg(cryp, rctx->in_sg, rctx->out_sg); + if (ret) + return ret; finish_req: - starfive_aes_finish_req(cryp); + starfive_aes_finish_req(ctx); return 0; } -static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm) +static int starfive_aes_aead_init_tfm(struct crypto_aead *tfm, + const char *alg_name) { struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm); - struct starfive_cryp_dev *cryp = ctx->cryp; - struct crypto_tfm *aead = crypto_aead_tfm(tfm); - struct crypto_alg *alg = aead->__crt_alg; ctx->cryp = starfive_cryp_find_dev(ctx); if (!ctx->cryp) return -ENODEV; - if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->aead_fbk = crypto_alloc_aead(alg->cra_name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->aead_fbk)) - return dev_err_probe(cryp->dev, PTR_ERR(ctx->aead_fbk), - "%s() failed to allocate fallback for %s\n", - __func__, alg->cra_name); - } + ctx->aead_fbk = crypto_alloc_aead(alg_name, 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->aead_fbk)) + return dev_err_probe(ctx->cryp->dev, PTR_ERR(ctx->aead_fbk), + "%s() failed to allocate fallback for %s\n", + __func__, alg_name); - crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_ctx) + - sizeof(struct aead_request)); + crypto_aead_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + + crypto_aead_reqsize(ctx->aead_fbk)); return 0; } @@ -646,6 +745,46 @@ static void starfive_aes_aead_exit_tfm(struct crypto_aead *tfm) crypto_free_aead(ctx->aead_fbk); } +static bool starfive_aes_check_unaligned(struct starfive_cryp_dev *cryp, + struct scatterlist *src, + struct scatterlist *dst) +{ + struct scatterlist *tsg; + int i; + + for_each_sg(src, tsg, sg_nents(src), i) + if (!IS_ALIGNED(tsg->offset, sizeof(u32)) || + (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && + !sg_is_last(tsg))) + return true; + + if (src != dst) + for_each_sg(dst, tsg, sg_nents(dst), i) + if (!IS_ALIGNED(tsg->offset, sizeof(u32)) || + (!IS_ALIGNED(tsg->length, AES_BLOCK_SIZE) && + !sg_is_last(tsg))) + return true; + + return false; +} + +static int starfive_aes_do_fallback(struct skcipher_request *req, bool enc) +{ + struct starfive_cryp_ctx *ctx = + crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct skcipher_request *subreq = skcipher_request_ctx(req); + + skcipher_request_set_tfm(subreq, ctx->skcipher_fbk); + skcipher_request_set_callback(subreq, req->base.flags, + req->base.complete, + req->base.data); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + + return enc ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); +} + static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); @@ -660,32 +799,54 @@ static int starfive_aes_crypt(struct skcipher_request *req, unsigned long flags) if (req->cryptlen & blocksize_align) return -EINVAL; + if (starfive_aes_check_unaligned(cryp, req->src, req->dst)) + return starfive_aes_do_fallback(req, is_encrypt(cryp)); + return crypto_transfer_skcipher_request_to_engine(cryp->engine, req); } +static int starfive_aes_aead_do_fallback(struct aead_request *req, bool enc) +{ + struct starfive_cryp_ctx *ctx = + crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct aead_request *subreq = aead_request_ctx(req); + + aead_request_set_tfm(subreq, ctx->aead_fbk); + aead_request_set_callback(subreq, req->base.flags, + req->base.complete, + req->base.data); + aead_request_set_crypt(subreq, req->src, req->dst, + req->cryptlen, req->iv); + aead_request_set_ad(subreq, req->assoclen); + + return enc ? crypto_aead_encrypt(subreq) : + crypto_aead_decrypt(subreq); +} + static int starfive_aes_aead_crypt(struct aead_request *req, unsigned long flags) { struct starfive_cryp_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); struct starfive_cryp_dev *cryp = ctx->cryp; + struct scatterlist *src, *dst, _src[2], _dst[2]; cryp->flags = flags; - /* - * HW engine could not perform CCM tag verification on - * non-blocksize aligned text, use fallback algo instead + /* aes-ccm does not support tag verification for non-aligned text, + * use fallback for ccm decryption instead. */ - if (ctx->aead_fbk && !is_encrypt(cryp)) { - struct aead_request *subreq = aead_request_ctx(req); + if (((cryp->flags & FLG_MODE_MASK) == STARFIVE_AES_MODE_CCM) && + !is_encrypt(cryp)) + return starfive_aes_aead_do_fallback(req, 0); - aead_request_set_tfm(subreq, ctx->aead_fbk); - aead_request_set_callback(subreq, req->base.flags, - req->base.complete, req->base.data); - aead_request_set_crypt(subreq, req->src, - req->dst, req->cryptlen, req->iv); - aead_request_set_ad(subreq, req->assoclen); + src = scatterwalk_ffwd(_src, req->src, req->assoclen); - return crypto_aead_decrypt(subreq); - } + if (req->src == req->dst) + dst = src; + else + dst = scatterwalk_ffwd(_dst, req->dst, req->assoclen); + + if (starfive_aes_check_unaligned(cryp, src, dst)) + return starfive_aes_aead_do_fallback(req, is_encrypt(cryp)); return crypto_transfer_aead_request_to_engine(cryp->engine, req); } @@ -706,7 +867,7 @@ static int starfive_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, memcpy(ctx->key, key, keylen); ctx->keylen = keylen; - return 0; + return crypto_skcipher_setkey(ctx->skcipher_fbk, key, keylen); } static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key, @@ -725,16 +886,20 @@ static int starfive_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key, memcpy(ctx->key, key, keylen); ctx->keylen = keylen; - if (ctx->aead_fbk) - return crypto_aead_setkey(ctx->aead_fbk, key, keylen); - - return 0; + return crypto_aead_setkey(ctx->aead_fbk, key, keylen); } static int starfive_aes_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { - return crypto_gcm_check_authsize(authsize); + struct starfive_cryp_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_gcm_check_authsize(authsize); + if (ret) + return ret; + + return crypto_aead_setauthsize(ctx->aead_fbk, authsize); } static int starfive_aes_ccm_setauthsize(struct crypto_aead *tfm, @@ -820,9 +985,35 @@ static int starfive_aes_ccm_decrypt(struct aead_request *req) return starfive_aes_aead_crypt(req, STARFIVE_AES_MODE_CCM); } +static int starfive_aes_ecb_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "ecb(aes-generic)"); +} + +static int starfive_aes_cbc_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "cbc(aes-generic)"); +} + +static int starfive_aes_ctr_init_tfm(struct crypto_skcipher *tfm) +{ + return starfive_aes_init_tfm(tfm, "ctr(aes-generic)"); +} + +static int starfive_aes_ccm_init_tfm(struct crypto_aead *tfm) +{ + return starfive_aes_aead_init_tfm(tfm, "ccm_base(ctr(aes-generic),cbcmac(aes-generic))"); +} + +static int starfive_aes_gcm_init_tfm(struct crypto_aead *tfm) +{ + return starfive_aes_aead_init_tfm(tfm, "gcm_base(ctr(aes-generic),ghash-generic)"); +} + static struct skcipher_engine_alg skcipher_algs[] = { { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_ecb_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_ecb_encrypt, .base.decrypt = starfive_aes_ecb_decrypt, @@ -832,7 +1023,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "ecb(aes)", .cra_driver_name = "starfive-ecb-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -842,7 +1034,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .do_one_request = starfive_aes_do_one_req, }, }, { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_cbc_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_cbc_encrypt, .base.decrypt = starfive_aes_cbc_decrypt, @@ -853,7 +1046,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "cbc(aes)", .cra_driver_name = "starfive-cbc-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -863,7 +1057,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .do_one_request = starfive_aes_do_one_req, }, }, { - .base.init = starfive_aes_init_tfm, + .base.init = starfive_aes_ctr_init_tfm, + .base.exit = starfive_aes_exit_tfm, .base.setkey = starfive_aes_setkey, .base.encrypt = starfive_aes_ctr_encrypt, .base.decrypt = starfive_aes_ctr_decrypt, @@ -874,7 +1069,8 @@ static struct skcipher_engine_alg skcipher_algs[] = { .cra_name = "ctr(aes)", .cra_driver_name = "starfive-ctr-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -892,7 +1088,7 @@ static struct aead_engine_alg aead_algs[] = { .base.setauthsize = starfive_aes_gcm_setauthsize, .base.encrypt = starfive_aes_gcm_encrypt, .base.decrypt = starfive_aes_gcm_decrypt, - .base.init = starfive_aes_aead_init_tfm, + .base.init = starfive_aes_gcm_init_tfm, .base.exit = starfive_aes_aead_exit_tfm, .base.ivsize = GCM_AES_IV_SIZE, .base.maxauthsize = AES_BLOCK_SIZE, @@ -900,7 +1096,8 @@ static struct aead_engine_alg aead_algs[] = { .cra_name = "gcm(aes)", .cra_driver_name = "starfive-gcm-aes", .cra_priority = 200, - .cra_flags = CRYPTO_ALG_ASYNC, + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = 1, .cra_ctxsize = sizeof(struct starfive_cryp_ctx), .cra_alignmask = 0xf, @@ -914,7 +1111,7 @@ static struct aead_engine_alg aead_algs[] = { .base.setauthsize = starfive_aes_ccm_setauthsize, .base.encrypt = starfive_aes_ccm_encrypt, .base.decrypt = starfive_aes_ccm_decrypt, - .base.init = starfive_aes_aead_init_tfm, + .base.init = starfive_aes_ccm_init_tfm, .base.exit = starfive_aes_aead_exit_tfm, .base.ivsize = AES_BLOCK_SIZE, .base.maxauthsize = AES_BLOCK_SIZE, diff --git a/drivers/crypto/starfive/jh7110-cryp.c b/drivers/crypto/starfive/jh7110-cryp.c index 425fddf3a8..e4dfed7ee0 100644 --- a/drivers/crypto/starfive/jh7110-cryp.c +++ b/drivers/crypto/starfive/jh7110-cryp.c @@ -89,34 +89,10 @@ static void starfive_dma_cleanup(struct starfive_cryp_dev *cryp) dma_release_channel(cryp->rx); } -static irqreturn_t starfive_cryp_irq(int irq, void *priv) -{ - u32 status; - u32 mask; - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)priv; - - mask = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - status = readl(cryp->base + STARFIVE_IE_FLAG_OFFSET); - if (status & STARFIVE_IE_FLAG_AES_DONE) { - mask |= STARFIVE_IE_MASK_AES_DONE; - writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET); - tasklet_schedule(&cryp->aes_done); - } - - if (status & STARFIVE_IE_FLAG_HASH_DONE) { - mask |= STARFIVE_IE_MASK_HASH_DONE; - writel(mask, cryp->base + STARFIVE_IE_MASK_OFFSET); - tasklet_schedule(&cryp->hash_done); - } - - return IRQ_HANDLED; -} - static int starfive_cryp_probe(struct platform_device *pdev) { struct starfive_cryp_dev *cryp; struct resource *res; - int irq; int ret; cryp = devm_kzalloc(&pdev->dev, sizeof(*cryp), GFP_KERNEL); @@ -131,9 +107,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(cryp->base), "Error remapping memory for platform device\n"); - tasklet_init(&cryp->aes_done, starfive_aes_done_task, (unsigned long)cryp); - tasklet_init(&cryp->hash_done, starfive_hash_done_task, (unsigned long)cryp); - cryp->phys_base = res->start; cryp->dma_maxburst = 32; cryp->side_chan = side_chan; @@ -153,16 +126,6 @@ static int starfive_cryp_probe(struct platform_device *pdev) return dev_err_probe(&pdev->dev, PTR_ERR(cryp->rst), "Error getting hardware reset line\n"); - irq = platform_get_irq(pdev, 0); - if (irq < 0) - return irq; - - ret = devm_request_irq(&pdev->dev, irq, starfive_cryp_irq, 0, pdev->name, - (void *)cryp); - if (ret) - return dev_err_probe(&pdev->dev, ret, - "Failed to register interrupt handler\n"); - clk_prepare_enable(cryp->hclk); clk_prepare_enable(cryp->ahb); reset_control_deassert(cryp->rst); @@ -219,9 +182,6 @@ err_dma_init: clk_disable_unprepare(cryp->ahb); reset_control_assert(cryp->rst); - tasklet_kill(&cryp->aes_done); - tasklet_kill(&cryp->hash_done); - return ret; } @@ -233,9 +193,6 @@ static void starfive_cryp_remove(struct platform_device *pdev) starfive_hash_unregister_algs(); starfive_rsa_unregister_algs(); - tasklet_kill(&cryp->aes_done); - tasklet_kill(&cryp->hash_done); - crypto_engine_stop(cryp->engine); crypto_engine_exit(cryp->engine); diff --git a/drivers/crypto/starfive/jh7110-cryp.h b/drivers/crypto/starfive/jh7110-cryp.h index 6cdf6db5d9..494a74f527 100644 --- a/drivers/crypto/starfive/jh7110-cryp.h +++ b/drivers/crypto/starfive/jh7110-cryp.h @@ -91,6 +91,7 @@ union starfive_hash_csr { #define STARFIVE_HASH_KEY_DONE BIT(13) u32 key_done :1; u32 key_flag :1; +#define STARFIVE_HASH_HMAC_DONE BIT(15) u32 hmac_done :1; #define STARFIVE_HASH_BUSY BIT(16) u32 busy :1; @@ -168,6 +169,7 @@ struct starfive_cryp_ctx { struct crypto_akcipher *akcipher_fbk; struct crypto_ahash *ahash_fbk; struct crypto_aead *aead_fbk; + struct crypto_skcipher *skcipher_fbk; }; struct starfive_cryp_dev { @@ -185,11 +187,8 @@ struct starfive_cryp_dev { struct dma_chan *rx; struct dma_slave_config cfg_in; struct dma_slave_config cfg_out; - struct scatter_walk in_walk; - struct scatter_walk out_walk; struct crypto_engine *engine; - struct tasklet_struct aes_done; - struct tasklet_struct hash_done; + struct completion dma_done; size_t assoclen; size_t total_in; size_t total_out; @@ -236,7 +235,4 @@ void starfive_rsa_unregister_algs(void); int starfive_aes_register_algs(void); void starfive_aes_unregister_algs(void); - -void starfive_hash_done_task(unsigned long param); -void starfive_aes_done_task(unsigned long param); #endif diff --git a/drivers/crypto/starfive/jh7110-hash.c b/drivers/crypto/starfive/jh7110-hash.c index b6d1808012..2c60a1047b 100644 --- a/drivers/crypto/starfive/jh7110-hash.c +++ b/drivers/crypto/starfive/jh7110-hash.c @@ -36,15 +36,22 @@ #define STARFIVE_HASH_BUFLEN SHA512_BLOCK_SIZE #define STARFIVE_HASH_RESET 0x2 -static inline int starfive_hash_wait_busy(struct starfive_cryp_ctx *ctx) +static inline int starfive_hash_wait_busy(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_dev *cryp = ctx->cryp; u32 status; return readl_relaxed_poll_timeout(cryp->base + STARFIVE_HASH_SHACSR, status, !(status & STARFIVE_HASH_BUSY), 10, 100000); } +static inline int starfive_hash_wait_hmac_done(struct starfive_cryp_dev *cryp) +{ + u32 status; + + return readl_relaxed_poll_timeout(cryp->base + STARFIVE_HASH_SHACSR, status, + (status & STARFIVE_HASH_HMAC_DONE), 10, 100000); +} + static inline int starfive_hash_wait_key_done(struct starfive_cryp_ctx *ctx) { struct starfive_cryp_dev *cryp = ctx->cryp; @@ -84,64 +91,26 @@ static int starfive_hash_hmac_key(struct starfive_cryp_ctx *ctx) return 0; } -static void starfive_hash_start(void *param) +static void starfive_hash_start(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_ctx *ctx = param; - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; - union starfive_alg_cr alg_cr; union starfive_hash_csr csr; - u32 stat; - - dma_unmap_sg(cryp->dev, rctx->in_sg, rctx->in_sg_len, DMA_TO_DEVICE); - - alg_cr.v = 0; - alg_cr.clear = 1; - - writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); csr.v = readl(cryp->base + STARFIVE_HASH_SHACSR); csr.firstb = 0; csr.final = 1; - - stat = readl(cryp->base + STARFIVE_IE_MASK_OFFSET); - stat &= ~STARFIVE_IE_MASK_HASH_DONE; - writel(stat, cryp->base + STARFIVE_IE_MASK_OFFSET); writel(csr.v, cryp->base + STARFIVE_HASH_SHACSR); } -static int starfive_hash_xmit_dma(struct starfive_cryp_ctx *ctx) +static void starfive_hash_dma_callback(void *param) { - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; - struct dma_async_tx_descriptor *in_desc; - union starfive_alg_cr alg_cr; - int total_len; - int ret; - - if (!rctx->total) { - starfive_hash_start(ctx); - return 0; - } + struct starfive_cryp_dev *cryp = param; - writel(rctx->total, cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); - - total_len = rctx->total; - total_len = (total_len & 0x3) ? (((total_len >> 2) + 1) << 2) : total_len; - sg_dma_len(rctx->in_sg) = total_len; - - alg_cr.v = 0; - alg_cr.start = 1; - alg_cr.hash_dma_en = 1; - - writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); - - ret = dma_map_sg(cryp->dev, rctx->in_sg, rctx->in_sg_len, DMA_TO_DEVICE); - if (!ret) - return dev_err_probe(cryp->dev, -EINVAL, "dma_map_sg() error\n"); + complete(&cryp->dma_done); +} - cryp->cfg_in.direction = DMA_MEM_TO_DEV; - cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; +static void starfive_hash_dma_init(struct starfive_cryp_dev *cryp) +{ + cryp->cfg_in.src_addr_width = DMA_SLAVE_BUSWIDTH_16_BYTES; cryp->cfg_in.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; cryp->cfg_in.src_maxburst = cryp->dma_maxburst; cryp->cfg_in.dst_maxburst = cryp->dma_maxburst; @@ -149,50 +118,48 @@ static int starfive_hash_xmit_dma(struct starfive_cryp_ctx *ctx) dmaengine_slave_config(cryp->tx, &cryp->cfg_in); - in_desc = dmaengine_prep_slave_sg(cryp->tx, rctx->in_sg, - ret, DMA_MEM_TO_DEV, - DMA_PREP_INTERRUPT | DMA_CTRL_ACK); - - if (!in_desc) - return -EINVAL; - - in_desc->callback = starfive_hash_start; - in_desc->callback_param = ctx; - - dmaengine_submit(in_desc); - dma_async_issue_pending(cryp->tx); - - return 0; + init_completion(&cryp->dma_done); } -static int starfive_hash_xmit(struct starfive_cryp_ctx *ctx) +static int starfive_hash_dma_xfer(struct starfive_cryp_dev *cryp, + struct scatterlist *sg) { - struct starfive_cryp_request_ctx *rctx = ctx->rctx; - struct starfive_cryp_dev *cryp = ctx->cryp; + struct dma_async_tx_descriptor *in_desc; + union starfive_alg_cr alg_cr; int ret = 0; - rctx->csr.hash.v = 0; - rctx->csr.hash.reset = 1; - writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); - - if (starfive_hash_wait_busy(ctx)) - return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error resetting engine.\n"); + alg_cr.v = 0; + alg_cr.start = 1; + alg_cr.hash_dma_en = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); - rctx->csr.hash.v = 0; - rctx->csr.hash.mode = ctx->hash_mode; - rctx->csr.hash.ie = 1; + writel(sg_dma_len(sg), cryp->base + STARFIVE_DMA_IN_LEN_OFFSET); + sg_dma_len(sg) = ALIGN(sg_dma_len(sg), sizeof(u32)); - if (ctx->is_hmac) { - ret = starfive_hash_hmac_key(ctx); - if (ret) - return ret; - } else { - rctx->csr.hash.start = 1; - rctx->csr.hash.firstb = 1; - writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); + in_desc = dmaengine_prep_slave_sg(cryp->tx, sg, 1, DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!in_desc) { + ret = -EINVAL; + goto end; } - return starfive_hash_xmit_dma(ctx); + reinit_completion(&cryp->dma_done); + in_desc->callback = starfive_hash_dma_callback; + in_desc->callback_param = cryp; + + dmaengine_submit(in_desc); + dma_async_issue_pending(cryp->tx); + + if (!wait_for_completion_timeout(&cryp->dma_done, + msecs_to_jiffies(1000))) + ret = -ETIMEDOUT; + +end: + alg_cr.v = 0; + alg_cr.clear = 1; + writel(alg_cr.v, cryp->base + STARFIVE_ALG_CR_OFFSET); + + return ret; } static int starfive_hash_copy_hash(struct ahash_request *req) @@ -215,58 +182,74 @@ static int starfive_hash_copy_hash(struct ahash_request *req) return 0; } -void starfive_hash_done_task(unsigned long param) +static void starfive_hash_done_task(struct starfive_cryp_dev *cryp) { - struct starfive_cryp_dev *cryp = (struct starfive_cryp_dev *)param; int err = cryp->err; if (!err) err = starfive_hash_copy_hash(cryp->req.hreq); - /* Reset to clear hash_done in irq register*/ - writel(STARFIVE_HASH_RESET, cryp->base + STARFIVE_HASH_SHACSR); - crypto_finalize_hash_request(cryp->engine, cryp->req.hreq, err); } -static int starfive_hash_check_aligned(struct scatterlist *sg, size_t total, size_t align) +static int starfive_hash_one_request(struct crypto_engine *engine, void *areq) { - int len = 0; + struct ahash_request *req = container_of(areq, struct ahash_request, + base); + struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct starfive_cryp_request_ctx *rctx = ctx->rctx; + struct starfive_cryp_dev *cryp = ctx->cryp; + struct scatterlist *tsg; + int ret, src_nents, i; - if (!total) - return 0; + writel(STARFIVE_HASH_RESET, cryp->base + STARFIVE_HASH_SHACSR); - if (!IS_ALIGNED(total, align)) - return -EINVAL; + if (starfive_hash_wait_busy(cryp)) + return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error resetting hardware\n"); - while (sg) { - if (!IS_ALIGNED(sg->offset, sizeof(u32))) - return -EINVAL; + rctx->csr.hash.v = 0; + rctx->csr.hash.mode = ctx->hash_mode; - if (!IS_ALIGNED(sg->length, align)) - return -EINVAL; + if (ctx->is_hmac) { + ret = starfive_hash_hmac_key(ctx); + if (ret) + return ret; + } else { + rctx->csr.hash.start = 1; + rctx->csr.hash.firstb = 1; + writel(rctx->csr.hash.v, cryp->base + STARFIVE_HASH_SHACSR); + } + + /* No input message, get digest and end. */ + if (!rctx->total) + goto hash_start; + + starfive_hash_dma_init(cryp); + + for_each_sg(rctx->in_sg, tsg, rctx->in_sg_len, i) { + src_nents = dma_map_sg(cryp->dev, tsg, 1, DMA_TO_DEVICE); + if (src_nents == 0) + return dev_err_probe(cryp->dev, -ENOMEM, + "dma_map_sg error\n"); - len += sg->length; - sg = sg_next(sg); + ret = starfive_hash_dma_xfer(cryp, tsg); + dma_unmap_sg(cryp->dev, tsg, 1, DMA_TO_DEVICE); + if (ret) + return ret; } - if (len != total) - return -EINVAL; +hash_start: + starfive_hash_start(cryp); - return 0; -} + if (starfive_hash_wait_busy(cryp)) + return dev_err_probe(cryp->dev, -ETIMEDOUT, "Error generating digest\n"); -static int starfive_hash_one_request(struct crypto_engine *engine, void *areq) -{ - struct ahash_request *req = container_of(areq, struct ahash_request, - base); - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct starfive_cryp_dev *cryp = ctx->cryp; + if (ctx->is_hmac) + cryp->err = starfive_hash_wait_hmac_done(cryp); - if (!cryp) - return -ENODEV; + starfive_hash_done_task(cryp); - return starfive_hash_xmit(ctx); + return 0; } static int starfive_hash_init(struct ahash_request *req) @@ -337,22 +320,6 @@ static int starfive_hash_finup(struct ahash_request *req) return crypto_ahash_finup(&rctx->ahash_fbk_req); } -static int starfive_hash_digest_fb(struct ahash_request *req) -{ - struct starfive_cryp_request_ctx *rctx = ahash_request_ctx(req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(tfm); - - ahash_request_set_tfm(&rctx->ahash_fbk_req, ctx->ahash_fbk); - ahash_request_set_callback(&rctx->ahash_fbk_req, req->base.flags, - req->base.complete, req->base.data); - - ahash_request_set_crypt(&rctx->ahash_fbk_req, req->src, - req->result, req->nbytes); - - return crypto_ahash_digest(&rctx->ahash_fbk_req); -} - static int starfive_hash_digest(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -370,9 +337,6 @@ static int starfive_hash_digest(struct ahash_request *req) rctx->in_sg_len = sg_nents_for_len(rctx->in_sg, rctx->total); ctx->rctx = rctx; - if (starfive_hash_check_aligned(rctx->in_sg, rctx->total, rctx->blksize)) - return starfive_hash_digest_fb(req); - return crypto_transfer_hash_request_to_engine(cryp->engine, req); } @@ -406,7 +370,8 @@ static int starfive_hash_import(struct ahash_request *req, const void *in) static int starfive_hash_init_tfm(struct crypto_ahash *hash, const char *alg_name, - unsigned int mode) + unsigned int mode, + bool is_hmac) { struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); @@ -426,7 +391,7 @@ static int starfive_hash_init_tfm(struct crypto_ahash *hash, crypto_ahash_set_reqsize(hash, sizeof(struct starfive_cryp_request_ctx) + crypto_ahash_reqsize(ctx->ahash_fbk)); - ctx->keylen = 0; + ctx->is_hmac = is_hmac; ctx->hash_mode = mode; return 0; @@ -529,81 +494,61 @@ static int starfive_hash_setkey(struct crypto_ahash *hash, static int starfive_sha224_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha224-generic", - STARFIVE_HASH_SHA224); + STARFIVE_HASH_SHA224, 0); } static int starfive_sha256_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha256-generic", - STARFIVE_HASH_SHA256); + STARFIVE_HASH_SHA256, 0); } static int starfive_sha384_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha384-generic", - STARFIVE_HASH_SHA384); + STARFIVE_HASH_SHA384, 0); } static int starfive_sha512_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sha512-generic", - STARFIVE_HASH_SHA512); + STARFIVE_HASH_SHA512, 0); } static int starfive_sm3_init_tfm(struct crypto_ahash *hash) { return starfive_hash_init_tfm(hash, "sm3-generic", - STARFIVE_HASH_SM3); + STARFIVE_HASH_SM3, 0); } static int starfive_hmac_sha224_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha224-generic)", - STARFIVE_HASH_SHA224); + STARFIVE_HASH_SHA224, 1); } static int starfive_hmac_sha256_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha256-generic)", - STARFIVE_HASH_SHA256); + STARFIVE_HASH_SHA256, 1); } static int starfive_hmac_sha384_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha384-generic)", - STARFIVE_HASH_SHA384); + STARFIVE_HASH_SHA384, 1); } static int starfive_hmac_sha512_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sha512-generic)", - STARFIVE_HASH_SHA512); + STARFIVE_HASH_SHA512, 1); } static int starfive_hmac_sm3_init_tfm(struct crypto_ahash *hash) { - struct starfive_cryp_ctx *ctx = crypto_ahash_ctx(hash); - - ctx->is_hmac = true; - return starfive_hash_init_tfm(hash, "hmac(sm3-generic)", - STARFIVE_HASH_SM3); + STARFIVE_HASH_SM3, 1); } static struct ahash_engine_alg algs_sha2_sm3[] = { diff --git a/drivers/crypto/starfive/jh7110-rsa.c b/drivers/crypto/starfive/jh7110-rsa.c index cf8bda7f08..33093ba4b1 100644 --- a/drivers/crypto/starfive/jh7110-rsa.c +++ b/drivers/crypto/starfive/jh7110-rsa.c @@ -45,6 +45,9 @@ static inline int starfive_pka_wait_done(struct starfive_cryp_ctx *ctx) static void starfive_rsa_free_key(struct starfive_rsa_key *key) { + if (!key->key_sz) + return; + kfree_sensitive(key->d); kfree_sensitive(key->e); kfree_sensitive(key->n); @@ -273,7 +276,6 @@ static int starfive_rsa_enc_core(struct starfive_cryp_ctx *ctx, int enc) err_rsa_crypt: writel(STARFIVE_RSA_RESET, cryp->base + STARFIVE_PKA_CACR_OFFSET); - kfree(rctx->rsa_data); return ret; } @@ -534,16 +536,14 @@ static int starfive_rsa_init_tfm(struct crypto_akcipher *tfm) { struct starfive_cryp_ctx *ctx = akcipher_tfm_ctx(tfm); + ctx->cryp = starfive_cryp_find_dev(ctx); + if (!ctx->cryp) + return -ENODEV; + ctx->akcipher_fbk = crypto_alloc_akcipher("rsa-generic", 0, 0); if (IS_ERR(ctx->akcipher_fbk)) return PTR_ERR(ctx->akcipher_fbk); - ctx->cryp = starfive_cryp_find_dev(ctx); - if (!ctx->cryp) { - crypto_free_akcipher(ctx->akcipher_fbk); - return -ENODEV; - } - akcipher_set_reqsize(tfm, sizeof(struct starfive_cryp_request_ctx) + sizeof(struct crypto_akcipher) + 32); diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c index 34e0d7e381..351827372e 100644 --- a/drivers/crypto/stm32/stm32-hash.c +++ b/drivers/crypto/stm32/stm32-hash.c @@ -94,6 +94,7 @@ #define HASH_FLAGS_ERRORS BIT(21) #define HASH_FLAGS_EMPTY BIT(22) #define HASH_FLAGS_HMAC BIT(23) +#define HASH_FLAGS_SGS_COPIED BIT(24) #define HASH_OP_UPDATE 1 #define HASH_OP_FINAL 2 @@ -145,7 +146,7 @@ struct stm32_hash_state { u16 bufcnt; u16 blocklen; - u8 buffer[HASH_BUFLEN] __aligned(4); + u8 buffer[HASH_BUFLEN] __aligned(sizeof(u32)); /* hash state */ u32 hw_context[3 + HASH_CSR_NB_MAX]; @@ -158,8 +159,8 @@ struct stm32_hash_request_ctx { u8 digest[SHA512_DIGEST_SIZE] __aligned(sizeof(u32)); size_t digcnt; - /* DMA */ struct scatterlist *sg; + struct scatterlist sgl[2]; /* scatterlist used to realize alignment */ unsigned int offset; unsigned int total; struct scatterlist sg_key; @@ -184,6 +185,7 @@ struct stm32_hash_pdata { size_t algs_info_size; bool has_sr; bool has_mdmat; + bool context_secured; bool broken_emptymsg; bool ux500; }; @@ -195,6 +197,7 @@ struct stm32_hash_dev { struct reset_control *rst; void __iomem *io_base; phys_addr_t phys_base; + u8 xmit_buf[HASH_BUFLEN] __aligned(sizeof(u32)); u32 dma_mode; bool polled; @@ -220,6 +223,8 @@ static struct stm32_hash_drv stm32_hash = { }; static void stm32_hash_dma_callback(void *param); +static int stm32_hash_prepare_request(struct ahash_request *req); +static void stm32_hash_unprepare_request(struct ahash_request *req); static inline u32 stm32_hash_read(struct stm32_hash_dev *hdev, u32 offset) { @@ -232,6 +237,11 @@ static inline void stm32_hash_write(struct stm32_hash_dev *hdev, writel_relaxed(value, hdev->io_base + offset); } +/** + * stm32_hash_wait_busy - wait until hash processor is available. It return an + * error if the hash core is processing a block of data for more than 10 ms. + * @hdev: the stm32_hash_dev device. + */ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) { u32 status; @@ -245,6 +255,11 @@ static inline int stm32_hash_wait_busy(struct stm32_hash_dev *hdev) !(status & HASH_SR_BUSY), 10, 10000); } +/** + * stm32_hash_set_nblw - set the number of valid bytes in the last word. + * @hdev: the stm32_hash_dev device. + * @length: the length of the final word. + */ static void stm32_hash_set_nblw(struct stm32_hash_dev *hdev, int length) { u32 reg; @@ -282,6 +297,11 @@ static int stm32_hash_write_key(struct stm32_hash_dev *hdev) return 0; } +/** + * stm32_hash_write_ctrl - Initialize the hash processor, only if + * HASH_FLAGS_INIT is set. + * @hdev: the stm32_hash_dev device + */ static void stm32_hash_write_ctrl(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); @@ -469,9 +489,7 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct stm32_hash_state *state = &rctx->state; - u32 *preg = state->hw_context; int bufcnt, err = 0, final; - int i, swap_reg; dev_dbg(hdev->dev, "%s flags %x\n", __func__, state->flags); @@ -495,34 +513,23 @@ static int stm32_hash_update_cpu(struct stm32_hash_dev *hdev) return stm32_hash_xmit_cpu(hdev, state->buffer, bufcnt, 1); } - if (!(hdev->flags & HASH_FLAGS_INIT)) - return 0; - - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - - swap_reg = hash_swap_reg(rctx); - - if (!hdev->pdata->ux500) - *preg++ = stm32_hash_read(hdev, HASH_IMR); - *preg++ = stm32_hash_read(hdev, HASH_STR); - *preg++ = stm32_hash_read(hdev, HASH_CR); - for (i = 0; i < swap_reg; i++) - *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); - - state->flags |= HASH_FLAGS_INIT; - return err; } static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, - struct scatterlist *sg, int length, int mdma) + struct scatterlist *sg, int length, int mdmat) { struct dma_async_tx_descriptor *in_desc; dma_cookie_t cookie; u32 reg; int err; + dev_dbg(hdev->dev, "%s mdmat: %x length: %d\n", __func__, mdmat, length); + + /* do not use dma if there is no data to send */ + if (length <= 0) + return 0; + in_desc = dmaengine_prep_slave_sg(hdev->dma_lch, sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT | DMA_CTRL_ACK); @@ -535,13 +542,12 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, in_desc->callback = stm32_hash_dma_callback; in_desc->callback_param = hdev; - hdev->flags |= HASH_FLAGS_FINAL; hdev->flags |= HASH_FLAGS_DMA_ACTIVE; reg = stm32_hash_read(hdev, HASH_CR); if (hdev->pdata->has_mdmat) { - if (mdma) + if (mdmat) reg |= HASH_CR_MDMAT; else reg &= ~HASH_CR_MDMAT; @@ -550,7 +556,6 @@ static int stm32_hash_xmit_dma(struct stm32_hash_dev *hdev, stm32_hash_write(hdev, HASH_CR, reg); - stm32_hash_set_nblw(hdev, length); cookie = dmaengine_submit(in_desc); err = dma_submit_error(cookie); @@ -590,7 +595,7 @@ static int stm32_hash_hmac_dma_send(struct stm32_hash_dev *hdev) struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); int err; - if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode == 1) { + if (ctx->keylen < rctx->state.blocklen || hdev->dma_mode > 0) { err = stm32_hash_write_key(hdev); if (stm32_hash_wait_busy(hdev)) return -ETIMEDOUT; @@ -655,18 +660,20 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) struct scatterlist sg[1], *tsg; int err = 0, reg, ncp = 0; unsigned int i, len = 0, bufcnt = 0; + bool final = hdev->flags & HASH_FLAGS_FINAL; bool is_last = false; + u32 last_word; - rctx->sg = hdev->req->src; - rctx->total = hdev->req->nbytes; + dev_dbg(hdev->dev, "%s total: %d bufcnt: %d final: %d\n", + __func__, rctx->total, rctx->state.bufcnt, final); - rctx->nents = sg_nents(rctx->sg); if (rctx->nents < 0) return -EINVAL; stm32_hash_write_ctrl(hdev); - if (hdev->flags & HASH_FLAGS_HMAC) { + if (hdev->flags & HASH_FLAGS_HMAC && (!(hdev->flags & HASH_FLAGS_HMAC_KEY))) { + hdev->flags |= HASH_FLAGS_HMAC_KEY; err = stm32_hash_hmac_dma_send(hdev); if (err != -EINPROGRESS) return err; @@ -677,22 +684,36 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) len = sg->length; if (sg_is_last(sg) || (bufcnt + sg[0].length) >= rctx->total) { - sg->length = rctx->total - bufcnt; - is_last = true; - if (hdev->dma_mode == 1) { - len = (ALIGN(sg->length, 16) - 16); - - ncp = sg_pcopy_to_buffer( - rctx->sg, rctx->nents, - rctx->state.buffer, sg->length - len, - rctx->total - sg->length + len); - - sg->length = len; + if (!final) { + /* Always manually put the last word of a non-final transfer. */ + len -= sizeof(u32); + sg_pcopy_to_buffer(rctx->sg, rctx->nents, &last_word, 4, len); + sg->length -= sizeof(u32); } else { - if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { - len = sg->length; - sg->length = ALIGN(sg->length, - sizeof(u32)); + /* + * In Multiple DMA mode, DMA must be aborted before the final + * transfer. + */ + sg->length = rctx->total - bufcnt; + if (hdev->dma_mode > 0) { + len = (ALIGN(sg->length, 16) - 16); + + ncp = sg_pcopy_to_buffer(rctx->sg, rctx->nents, + rctx->state.buffer, + sg->length - len, + rctx->total - sg->length + len); + + if (!len) + break; + + sg->length = len; + } else { + is_last = true; + if (!(IS_ALIGNED(sg->length, sizeof(u32)))) { + len = sg->length; + sg->length = ALIGN(sg->length, + sizeof(u32)); + } } } } @@ -706,43 +727,67 @@ static int stm32_hash_dma_send(struct stm32_hash_dev *hdev) err = stm32_hash_xmit_dma(hdev, sg, len, !is_last); + /* The last word of a non final transfer is sent manually. */ + if (!final) { + stm32_hash_write(hdev, HASH_DIN, last_word); + len += sizeof(u32); + } + + rctx->total -= len; + bufcnt += sg[0].length; dma_unmap_sg(hdev->dev, sg, 1, DMA_TO_DEVICE); - if (err == -ENOMEM) + if (err == -ENOMEM || err == -ETIMEDOUT) return err; if (is_last) break; } - if (hdev->dma_mode == 1) { - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - reg = stm32_hash_read(hdev, HASH_CR); - reg &= ~HASH_CR_DMAE; - reg |= HASH_CR_DMAA; - stm32_hash_write(hdev, HASH_CR, reg); + /* + * When the second last block transfer of 4 words is performed by the DMA, + * the software must set the DMA Abort bit (DMAA) to 1 before completing the + * last transfer of 4 words or less. + */ + if (final) { + if (hdev->dma_mode > 0) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + reg = stm32_hash_read(hdev, HASH_CR); + reg &= ~HASH_CR_DMAE; + reg |= HASH_CR_DMAA; + stm32_hash_write(hdev, HASH_CR, reg); + + if (ncp) { + memset(buffer + ncp, 0, 4 - DIV_ROUND_UP(ncp, sizeof(u32))); + writesl(hdev->io_base + HASH_DIN, buffer, + DIV_ROUND_UP(ncp, sizeof(u32))); + } - if (ncp) { - memset(buffer + ncp, 0, - DIV_ROUND_UP(ncp, sizeof(u32)) - ncp); - writesl(hdev->io_base + HASH_DIN, buffer, - DIV_ROUND_UP(ncp, sizeof(u32))); + stm32_hash_set_nblw(hdev, ncp); + reg = stm32_hash_read(hdev, HASH_STR); + reg |= HASH_STR_DCAL; + stm32_hash_write(hdev, HASH_STR, reg); + err = -EINPROGRESS; } - stm32_hash_set_nblw(hdev, ncp); - reg = stm32_hash_read(hdev, HASH_STR); - reg |= HASH_STR_DCAL; - stm32_hash_write(hdev, HASH_STR, reg); - err = -EINPROGRESS; - } - if (hdev->flags & HASH_FLAGS_HMAC) { - if (stm32_hash_wait_busy(hdev)) - return -ETIMEDOUT; - err = stm32_hash_hmac_dma_send(hdev); + /* + * The hash processor needs the key to be loaded a second time in order + * to process the HMAC. + */ + if (hdev->flags & HASH_FLAGS_HMAC) { + if (stm32_hash_wait_busy(hdev)) + return -ETIMEDOUT; + err = stm32_hash_hmac_dma_send(hdev); + } + + return err; } - return err; + if (err != -EINPROGRESS) + return err; + + return 0; } static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) @@ -765,33 +810,6 @@ static struct stm32_hash_dev *stm32_hash_find_dev(struct stm32_hash_ctx *ctx) return hdev; } -static bool stm32_hash_dma_aligned_data(struct ahash_request *req) -{ - struct scatterlist *sg; - struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); - struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); - int i; - - if (!hdev->dma_lch || req->nbytes <= rctx->state.blocklen) - return false; - - if (sg_nents(req->src) > 1) { - if (hdev->dma_mode == 1) - return false; - for_each_sg(req->src, sg, sg_nents(req->src), i) { - if ((!IS_ALIGNED(sg->length, sizeof(u32))) && - (!sg_is_last(sg))) - return false; - } - } - - if (req->src->offset % 4) - return false; - - return true; -} - static int stm32_hash_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -802,8 +820,10 @@ static int stm32_hash_init(struct ahash_request *req) bool sha3_mode = ctx->flags & HASH_FLAGS_SHA3_MODE; rctx->hdev = hdev; + state->flags = 0; - state->flags = HASH_FLAGS_CPU; + if (!(hdev->dma_lch && hdev->pdata->has_mdmat)) + state->flags |= HASH_FLAGS_CPU; if (sha3_mode) state->flags |= HASH_FLAGS_SHA3_MODE; @@ -857,6 +877,7 @@ static int stm32_hash_init(struct ahash_request *req) dev_err(hdev->dev, "Error, block too large"); return -EINVAL; } + rctx->nents = 0; rctx->total = 0; rctx->offset = 0; rctx->data_type = HASH_DATA_8_BITS; @@ -874,6 +895,9 @@ static int stm32_hash_update_req(struct stm32_hash_dev *hdev) struct stm32_hash_request_ctx *rctx = ahash_request_ctx(hdev->req); struct stm32_hash_state *state = &rctx->state; + dev_dbg(hdev->dev, "update_req: total: %u, digcnt: %zd, final: 0", + rctx->total, rctx->digcnt); + if (!(state->flags & HASH_FLAGS_CPU)) return stm32_hash_dma_send(hdev); @@ -887,6 +911,11 @@ static int stm32_hash_final_req(struct stm32_hash_dev *hdev) struct stm32_hash_state *state = &rctx->state; int buflen = state->bufcnt; + if (!(state->flags & HASH_FLAGS_CPU)) { + hdev->flags |= HASH_FLAGS_FINAL; + return stm32_hash_dma_send(hdev); + } + if (state->flags & HASH_FLAGS_FINUP) return stm32_hash_update_req(hdev); @@ -968,15 +997,21 @@ static int stm32_hash_finish(struct ahash_request *req) static void stm32_hash_finish_req(struct ahash_request *req, int err) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_state *state = &rctx->state; struct stm32_hash_dev *hdev = rctx->hdev; + if (hdev->flags & HASH_FLAGS_DMA_ACTIVE) + state->flags |= HASH_FLAGS_DMA_ACTIVE; + else + state->flags &= ~HASH_FLAGS_DMA_ACTIVE; + if (!err && (HASH_FLAGS_FINAL & hdev->flags)) { stm32_hash_copy_hash(req); err = stm32_hash_finish(req); } - pm_runtime_mark_last_busy(hdev->dev); - pm_runtime_put_autosuspend(hdev->dev); + /* Finalized request mist be unprepared here */ + stm32_hash_unprepare_request(req); crypto_finalize_hash_request(hdev->engine, req, err); } @@ -1006,6 +1041,10 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) pm_runtime_get_sync(hdev->dev); + err = stm32_hash_prepare_request(req); + if (err) + return err; + hdev->req = req; hdev->flags = 0; swap_reg = hash_swap_reg(rctx); @@ -1030,6 +1069,12 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) if (state->flags & HASH_FLAGS_HMAC) hdev->flags |= HASH_FLAGS_HMAC | HASH_FLAGS_HMAC_KEY; + + if (state->flags & HASH_FLAGS_CPU) + hdev->flags |= HASH_FLAGS_CPU; + + if (state->flags & HASH_FLAGS_DMA_ACTIVE) + hdev->flags |= HASH_FLAGS_DMA_ACTIVE; } if (rctx->op == HASH_OP_UPDATE) @@ -1054,6 +1099,284 @@ static int stm32_hash_one_request(struct crypto_engine *engine, void *areq) return 0; } +static int stm32_hash_copy_sgs(struct stm32_hash_request_ctx *rctx, + struct scatterlist *sg, int bs, + unsigned int new_len) +{ + struct stm32_hash_state *state = &rctx->state; + int pages; + void *buf; + + pages = get_order(new_len); + + buf = (void *)__get_free_pages(GFP_ATOMIC, pages); + if (!buf) { + pr_err("Couldn't allocate pages for unaligned cases.\n"); + return -ENOMEM; + } + + if (state->bufcnt) + memcpy(buf, rctx->hdev->xmit_buf, state->bufcnt); + + scatterwalk_map_and_copy(buf + state->bufcnt, sg, rctx->offset, + min(new_len, rctx->total) - state->bufcnt, 0); + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, buf, new_len); + rctx->sg = rctx->sgl; + state->flags |= HASH_FLAGS_SGS_COPIED; + rctx->nents = 1; + rctx->offset += new_len - state->bufcnt; + state->bufcnt = 0; + rctx->total = new_len; + + return 0; +} + +static int stm32_hash_align_sgs(struct scatterlist *sg, + int nbytes, int bs, bool init, bool final, + struct stm32_hash_request_ctx *rctx) +{ + struct stm32_hash_state *state = &rctx->state; + struct stm32_hash_dev *hdev = rctx->hdev; + struct scatterlist *sg_tmp = sg; + int offset = rctx->offset; + int new_len; + int n = 0; + int bufcnt = state->bufcnt; + bool secure_ctx = hdev->pdata->context_secured; + bool aligned = true; + + if (!sg || !sg->length || !nbytes) { + if (bufcnt) { + bufcnt = DIV_ROUND_UP(bufcnt, bs) * bs; + sg_init_table(rctx->sgl, 1); + sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, bufcnt); + rctx->sg = rctx->sgl; + rctx->nents = 1; + } + + return 0; + } + + new_len = nbytes; + + if (offset) + aligned = false; + + if (final) { + new_len = DIV_ROUND_UP(new_len, bs) * bs; + } else { + new_len = (new_len - 1) / bs * bs; // return n block - 1 block + + /* + * Context save in some version of HASH IP can only be done when the + * FIFO is ready to get a new block. This implies to send n block plus a + * 32 bit word in the first DMA send. + */ + if (init && secure_ctx) { + new_len += sizeof(u32); + if (unlikely(new_len > nbytes)) + new_len -= bs; + } + } + + if (!new_len) + return 0; + + if (nbytes != new_len) + aligned = false; + + while (nbytes > 0 && sg_tmp) { + n++; + + if (bufcnt) { + if (!IS_ALIGNED(bufcnt, bs)) { + aligned = false; + break; + } + nbytes -= bufcnt; + bufcnt = 0; + if (!nbytes) + aligned = false; + + continue; + } + + if (offset < sg_tmp->length) { + if (!IS_ALIGNED(offset + sg_tmp->offset, 4)) { + aligned = false; + break; + } + + if (!IS_ALIGNED(sg_tmp->length - offset, bs)) { + aligned = false; + break; + } + } + + if (offset) { + offset -= sg_tmp->length; + if (offset < 0) { + nbytes += offset; + offset = 0; + } + } else { + nbytes -= sg_tmp->length; + } + + sg_tmp = sg_next(sg_tmp); + + if (nbytes < 0) { + aligned = false; + break; + } + } + + if (!aligned) + return stm32_hash_copy_sgs(rctx, sg, bs, new_len); + + rctx->total = new_len; + rctx->offset += new_len; + rctx->nents = n; + if (state->bufcnt) { + sg_init_table(rctx->sgl, 2); + sg_set_buf(rctx->sgl, rctx->hdev->xmit_buf, state->bufcnt); + sg_chain(rctx->sgl, 2, sg); + rctx->sg = rctx->sgl; + } else { + rctx->sg = sg; + } + + return 0; +} + +static int stm32_hash_prepare_request(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + struct stm32_hash_state *state = &rctx->state; + unsigned int nbytes; + int ret, hash_later, bs; + bool update = rctx->op & HASH_OP_UPDATE; + bool init = !(state->flags & HASH_FLAGS_INIT); + bool finup = state->flags & HASH_FLAGS_FINUP; + bool final = state->flags & HASH_FLAGS_FINAL; + + if (!hdev->dma_lch || state->flags & HASH_FLAGS_CPU) + return 0; + + bs = crypto_ahash_blocksize(tfm); + + nbytes = state->bufcnt; + + /* + * In case of update request nbytes must correspond to the content of the + * buffer + the offset minus the content of the request already in the + * buffer. + */ + if (update || finup) + nbytes += req->nbytes - rctx->offset; + + dev_dbg(hdev->dev, + "%s: nbytes=%d, bs=%d, total=%d, offset=%d, bufcnt=%d\n", + __func__, nbytes, bs, rctx->total, rctx->offset, state->bufcnt); + + if (!nbytes) + return 0; + + rctx->total = nbytes; + + if (update && req->nbytes && (!IS_ALIGNED(state->bufcnt, bs))) { + int len = bs - state->bufcnt % bs; + + if (len > req->nbytes) + len = req->nbytes; + scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src, + 0, len, 0); + state->bufcnt += len; + rctx->offset = len; + } + + /* copy buffer in a temporary one that is used for sg alignment */ + if (state->bufcnt) + memcpy(hdev->xmit_buf, state->buffer, state->bufcnt); + + ret = stm32_hash_align_sgs(req->src, nbytes, bs, init, final, rctx); + if (ret) + return ret; + + hash_later = nbytes - rctx->total; + if (hash_later < 0) + hash_later = 0; + + if (hash_later && hash_later <= state->blocklen) { + scatterwalk_map_and_copy(state->buffer, + req->src, + req->nbytes - hash_later, + hash_later, 0); + + state->bufcnt = hash_later; + } else { + state->bufcnt = 0; + } + + if (hash_later > state->blocklen) { + /* FIXME: add support of this case */ + pr_err("Buffer contains more than one block.\n"); + return -ENOMEM; + } + + rctx->total = min(nbytes, rctx->total); + + return 0; +} + +static void stm32_hash_unprepare_request(struct ahash_request *req) +{ + struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); + struct stm32_hash_state *state = &rctx->state; + struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); + u32 *preg = state->hw_context; + int swap_reg, i; + + if (hdev->dma_lch) + dmaengine_terminate_sync(hdev->dma_lch); + + if (state->flags & HASH_FLAGS_SGS_COPIED) + free_pages((unsigned long)sg_virt(rctx->sg), get_order(rctx->sg->length)); + + rctx->sg = NULL; + rctx->offset = 0; + + state->flags &= ~(HASH_FLAGS_SGS_COPIED); + + if (!(hdev->flags & HASH_FLAGS_INIT)) + goto pm_runtime; + + state->flags |= HASH_FLAGS_INIT; + + if (stm32_hash_wait_busy(hdev)) { + dev_warn(hdev->dev, "Wait busy failed."); + return; + } + + swap_reg = hash_swap_reg(rctx); + + if (!hdev->pdata->ux500) + *preg++ = stm32_hash_read(hdev, HASH_IMR); + *preg++ = stm32_hash_read(hdev, HASH_STR); + *preg++ = stm32_hash_read(hdev, HASH_CR); + for (i = 0; i < swap_reg; i++) + *preg++ = stm32_hash_read(hdev, HASH_CSR(i)); + +pm_runtime: + pm_runtime_mark_last_busy(hdev->dev); + pm_runtime_put_autosuspend(hdev->dev); +} + static int stm32_hash_enqueue(struct ahash_request *req, unsigned int op) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); @@ -1070,16 +1393,26 @@ static int stm32_hash_update(struct ahash_request *req) struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); struct stm32_hash_state *state = &rctx->state; - if (!req->nbytes || !(state->flags & HASH_FLAGS_CPU)) + if (!req->nbytes) return 0; - rctx->total = req->nbytes; - rctx->sg = req->src; - rctx->offset = 0; - if ((state->bufcnt + rctx->total < state->blocklen)) { - stm32_hash_append_sg(rctx); - return 0; + if (state->flags & HASH_FLAGS_CPU) { + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + + if ((state->bufcnt + rctx->total < state->blocklen)) { + stm32_hash_append_sg(rctx); + return 0; + } + } else { /* DMA mode */ + if (state->bufcnt + req->nbytes <= state->blocklen) { + scatterwalk_map_and_copy(state->buffer + state->bufcnt, req->src, + 0, req->nbytes, 0); + state->bufcnt += req->nbytes; + return 0; + } } return stm32_hash_enqueue(req, HASH_OP_UPDATE); @@ -1098,20 +1431,18 @@ static int stm32_hash_final(struct ahash_request *req) static int stm32_hash_finup(struct ahash_request *req) { struct stm32_hash_request_ctx *rctx = ahash_request_ctx(req); - struct stm32_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct stm32_hash_dev *hdev = stm32_hash_find_dev(ctx); struct stm32_hash_state *state = &rctx->state; if (!req->nbytes) goto out; state->flags |= HASH_FLAGS_FINUP; - rctx->total = req->nbytes; - rctx->sg = req->src; - rctx->offset = 0; - if (hdev->dma_lch && stm32_hash_dma_aligned_data(req)) - state->flags &= ~HASH_FLAGS_CPU; + if ((state->flags & HASH_FLAGS_CPU)) { + rctx->total = req->nbytes; + rctx->sg = req->src; + rctx->offset = 0; + } out: return stm32_hash_final(req); @@ -1215,7 +1546,6 @@ static int stm32_hash_cra_sha3_hmac_init(struct crypto_tfm *tfm) HASH_FLAGS_HMAC); } - static void stm32_hash_cra_exit(struct crypto_tfm *tfm) { struct stm32_hash_ctx *ctx = crypto_tfm_ctx(tfm); @@ -1228,14 +1558,9 @@ static irqreturn_t stm32_hash_irq_thread(int irq, void *dev_id) { struct stm32_hash_dev *hdev = dev_id; - if (HASH_FLAGS_CPU & hdev->flags) { - if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { - hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; - goto finish; - } - } else if (HASH_FLAGS_DMA_ACTIVE & hdev->flags) { - hdev->flags &= ~HASH_FLAGS_DMA_ACTIVE; - goto finish; + if (HASH_FLAGS_OUTPUT_READY & hdev->flags) { + hdev->flags &= ~HASH_FLAGS_OUTPUT_READY; + goto finish; } return IRQ_HANDLED; @@ -1984,6 +2309,7 @@ static const struct stm32_hash_pdata stm32_hash_pdata_stm32mp13 = { .algs_info_size = ARRAY_SIZE(stm32_hash_algs_info_stm32mp13), .has_sr = true, .has_mdmat = true, + .context_secured = true, }; static const struct of_device_id stm32_hash_of_match[] = { diff --git a/drivers/crypto/tegra/Makefile b/drivers/crypto/tegra/Makefile new file mode 100644 index 0000000000..a32001e58e --- /dev/null +++ b/drivers/crypto/tegra/Makefile @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +tegra-se-objs := tegra-se-key.o tegra-se-main.o + +tegra-se-y += tegra-se-aes.o +tegra-se-y += tegra-se-hash.o + +obj-$(CONFIG_CRYPTO_DEV_TEGRA) += tegra-se.o diff --git a/drivers/crypto/tegra/tegra-se-aes.c b/drivers/crypto/tegra/tegra-se-aes.c new file mode 100644 index 0000000000..ae7a0f8435 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-aes.c @@ -0,0 +1,1933 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver to handle block cipher algorithms using NVIDIA Security Engine. + */ + +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <crypto/aead.h> +#include <crypto/aes.h> +#include <crypto/engine.h> +#include <crypto/gcm.h> +#include <crypto/scatterwalk.h> +#include <crypto/xts.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/skcipher.h> + +#include "tegra-se.h" + +struct tegra_aes_ctx { + struct tegra_se *se; + u32 alg; + u32 ivsize; + u32 key1_id; + u32 key2_id; +}; + +struct tegra_aes_reqctx { + struct tegra_se_datbuf datbuf; + bool encrypt; + u32 config; + u32 crypto_config; + u32 len; + u32 *iv; +}; + +struct tegra_aead_ctx { + struct tegra_se *se; + unsigned int authsize; + u32 alg; + u32 keylen; + u32 key_id; +}; + +struct tegra_aead_reqctx { + struct tegra_se_datbuf inbuf; + struct tegra_se_datbuf outbuf; + struct scatterlist *src_sg; + struct scatterlist *dst_sg; + unsigned int assoclen; + unsigned int cryptlen; + unsigned int authsize; + bool encrypt; + u32 config; + u32 crypto_config; + u32 key_id; + u32 iv[4]; + u8 authdata[16]; +}; + +struct tegra_cmac_ctx { + struct tegra_se *se; + unsigned int alg; + u32 key_id; + struct crypto_shash *fallback_tfm; +}; + +struct tegra_cmac_reqctx { + struct scatterlist *src_sg; + struct tegra_se_datbuf datbuf; + struct tegra_se_datbuf residue; + unsigned int total_len; + unsigned int blk_size; + unsigned int task; + u32 crypto_config; + u32 config; + u32 key_id; + u32 *iv; + u32 result[CMAC_RESULT_REG_COUNT]; +}; + +/* increment counter (128-bit int) */ +static void ctr_iv_inc(__u8 *counter, __u8 bits, __u32 nums) +{ + do { + --bits; + nums += counter[bits]; + counter[bits] = nums & 0xff; + nums >>= 8; + } while (bits && nums); +} + +static void tegra_cbc_iv_copyback(struct skcipher_request *req, struct tegra_aes_ctx *ctx) +{ + struct tegra_aes_reqctx *rctx = skcipher_request_ctx(req); + unsigned int offset; + + offset = req->cryptlen - ctx->ivsize; + + if (rctx->encrypt) + memcpy(req->iv, rctx->datbuf.buf + offset, ctx->ivsize); + else + scatterwalk_map_and_copy(req->iv, req->src, offset, ctx->ivsize, 0); +} + +static void tegra_aes_update_iv(struct skcipher_request *req, struct tegra_aes_ctx *ctx) +{ + int num; + + if (ctx->alg == SE_ALG_CBC) { + tegra_cbc_iv_copyback(req, ctx); + } else if (ctx->alg == SE_ALG_CTR) { + num = req->cryptlen / ctx->ivsize; + if (req->cryptlen % ctx->ivsize) + num++; + + ctr_iv_inc(req->iv, ctx->ivsize, num); + } +} + +static int tegra234_aes_crypto_cfg(u32 alg, bool encrypt) +{ + switch (alg) { + case SE_ALG_CMAC: + case SE_ALG_GMAC: + case SE_ALG_GCM: + case SE_ALG_GCM_FINAL: + return 0; + case SE_ALG_CBC: + if (encrypt) + return SE_CRYPTO_CFG_CBC_ENCRYPT; + else + return SE_CRYPTO_CFG_CBC_DECRYPT; + case SE_ALG_ECB: + if (encrypt) + return SE_CRYPTO_CFG_ECB_ENCRYPT; + else + return SE_CRYPTO_CFG_ECB_DECRYPT; + case SE_ALG_XTS: + if (encrypt) + return SE_CRYPTO_CFG_XTS_ENCRYPT; + else + return SE_CRYPTO_CFG_XTS_DECRYPT; + + case SE_ALG_CTR: + return SE_CRYPTO_CFG_CTR; + case SE_ALG_CBC_MAC: + return SE_CRYPTO_CFG_CBC_MAC; + + default: + break; + } + + return -EINVAL; +} + +static int tegra234_aes_cfg(u32 alg, bool encrypt) +{ + switch (alg) { + case SE_ALG_CBC: + case SE_ALG_ECB: + case SE_ALG_XTS: + case SE_ALG_CTR: + if (encrypt) + return SE_CFG_AES_ENCRYPT; + else + return SE_CFG_AES_DECRYPT; + + case SE_ALG_GMAC: + if (encrypt) + return SE_CFG_GMAC_ENCRYPT; + else + return SE_CFG_GMAC_DECRYPT; + + case SE_ALG_GCM: + if (encrypt) + return SE_CFG_GCM_ENCRYPT; + else + return SE_CFG_GCM_DECRYPT; + + case SE_ALG_GCM_FINAL: + if (encrypt) + return SE_CFG_GCM_FINAL_ENCRYPT; + else + return SE_CFG_GCM_FINAL_DECRYPT; + + case SE_ALG_CMAC: + return SE_CFG_CMAC; + + case SE_ALG_CBC_MAC: + return SE_AES_ENC_ALG_AES_ENC | + SE_AES_DST_HASH_REG; + } + return -EINVAL; +} + +static unsigned int tegra_aes_prep_cmd(struct tegra_aes_ctx *ctx, + struct tegra_aes_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + dma_addr_t addr = rctx->datbuf.addr; + + data_count = rctx->len / AES_BLOCK_SIZE; + res_bits = (rctx->len % AES_BLOCK_SIZE) * 8; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + if (rctx->iv) { + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + } + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source address setting */ + cpuvaddr[i++] = lower_32_bits(addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(addr)) | SE_ADDR_HI_SZ(rctx->len); + + /* Destination address setting */ + cpuvaddr[i++] = lower_32_bits(addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(addr)) | + SE_ADDR_HI_SZ(rctx->len); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_aes_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct skcipher_request *req = container_of(areq, struct skcipher_request, base); + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct tegra_aes_reqctx *rctx = skcipher_request_ctx(req); + struct tegra_se *se = ctx->se; + unsigned int cmdlen; + int ret; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_AES_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + return -ENOMEM; + + rctx->datbuf.size = SE_AES_BUFLEN; + rctx->iv = (u32 *)req->iv; + rctx->len = req->cryptlen; + + /* Pad input to AES Block size */ + if (ctx->alg != SE_ALG_XTS) { + if (rctx->len % AES_BLOCK_SIZE) + rctx->len += AES_BLOCK_SIZE - (rctx->len % AES_BLOCK_SIZE); + } + + scatterwalk_map_and_copy(rctx->datbuf.buf, req->src, 0, req->cryptlen, 0); + + /* Prepare the command and submit for execution */ + cmdlen = tegra_aes_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + + /* Copy the result */ + tegra_aes_update_iv(req, ctx); + scatterwalk_map_and_copy(rctx->datbuf.buf, req->dst, 0, req->cryptlen, 1); + + /* Free the buffer */ + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + + crypto_finalize_skcipher_request(se->engine, req, ret); + + return 0; +} + +static int tegra_aes_cra_init(struct crypto_skcipher *tfm) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + se_alg = container_of(alg, struct tegra_se_alg, alg.skcipher.base); + + crypto_skcipher_set_reqsize(tfm, sizeof(struct tegra_aes_reqctx)); + + ctx->ivsize = crypto_skcipher_ivsize(tfm); + ctx->se = se_alg->se_dev; + ctx->key1_id = 0; + ctx->key2_id = 0; + + algname = crypto_tfm_alg_name(&tfm->base); + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + return 0; +} + +static void tegra_aes_cra_exit(struct crypto_skcipher *tfm) +{ + struct tegra_aes_ctx *ctx = crypto_tfm_ctx(&tfm->base); + + if (ctx->key1_id) + tegra_key_invalidate(ctx->se, ctx->key1_id, ctx->alg); + + if (ctx->key2_id) + tegra_key_invalidate(ctx->se, ctx->key2_id, ctx->alg); +} + +static int tegra_aes_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key1_id); +} + +static int tegra_xts_setkey(struct crypto_skcipher *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aes_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 len = keylen / 2; + int ret; + + ret = xts_verify_key(tfm, key, keylen); + if (ret || aes_check_keylen(len)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + ret = tegra_key_submit(ctx->se, key, len, + ctx->alg, &ctx->key1_id); + if (ret) + return ret; + + return tegra_key_submit(ctx->se, key + len, len, + ctx->alg, &ctx->key2_id); + + return 0; +} + +static int tegra_aes_kac_manifest(u32 user, u32 alg, u32 keylen) +{ + int manifest; + + manifest = SE_KAC_USER_NS; + + switch (alg) { + case SE_ALG_CBC: + case SE_ALG_ECB: + case SE_ALG_CTR: + manifest |= SE_KAC_ENC; + break; + case SE_ALG_XTS: + manifest |= SE_KAC_XTS; + break; + case SE_ALG_GCM: + manifest |= SE_KAC_GCM; + break; + case SE_ALG_CMAC: + manifest |= SE_KAC_CMAC; + break; + case SE_ALG_CBC_MAC: + manifest |= SE_KAC_ENC; + break; + default: + return -EINVAL; + } + + switch (keylen) { + case AES_KEYSIZE_128: + manifest |= SE_KAC_SIZE_128; + break; + case AES_KEYSIZE_192: + manifest |= SE_KAC_SIZE_192; + break; + case AES_KEYSIZE_256: + manifest |= SE_KAC_SIZE_256; + break; + default: + return -EINVAL; + } + + return manifest; +} + +static int tegra_aes_crypt(struct skcipher_request *req, bool encrypt) + +{ + struct crypto_skcipher *tfm; + struct tegra_aes_ctx *ctx; + struct tegra_aes_reqctx *rctx; + + tfm = crypto_skcipher_reqtfm(req); + ctx = crypto_skcipher_ctx(tfm); + rctx = skcipher_request_ctx(req); + + if (ctx->alg != SE_ALG_XTS) { + if (!IS_ALIGNED(req->cryptlen, crypto_skcipher_blocksize(tfm))) { + dev_dbg(ctx->se->dev, "invalid length (%d)", req->cryptlen); + return -EINVAL; + } + } else if (req->cryptlen < XTS_BLOCK_SIZE) { + dev_dbg(ctx->se->dev, "invalid length (%d)", req->cryptlen); + return -EINVAL; + } + + if (!req->cryptlen) + return 0; + + rctx->encrypt = encrypt; + rctx->config = tegra234_aes_cfg(ctx->alg, encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(ctx->alg, encrypt); + rctx->crypto_config |= SE_AES_KEY_INDEX(ctx->key1_id); + + if (ctx->key2_id) + rctx->crypto_config |= SE_AES_KEY2_INDEX(ctx->key2_id); + + return crypto_transfer_skcipher_request_to_engine(ctx->se->engine, req); +} + +static int tegra_aes_encrypt(struct skcipher_request *req) +{ + return tegra_aes_crypt(req, true); +} + +static int tegra_aes_decrypt(struct skcipher_request *req) +{ + return tegra_aes_crypt(req, false); +} + +static struct tegra_se_alg tegra_aes_algs[] = { + { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .base = { + .cra_name = "ecb(aes)", + .cra_driver_name = "ecb-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_aes_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "ctr(aes)", + .cra_driver_name = "ctr-aes-tegra", + .cra_priority = 500, + .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.skcipher.op.do_one_request = tegra_aes_do_one_req, + .alg.skcipher.base = { + .init = tegra_aes_cra_init, + .exit = tegra_aes_cra_exit, + .setkey = tegra_xts_setkey, + .encrypt = tegra_aes_encrypt, + .decrypt = tegra_aes_decrypt, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .base = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts-aes-tegra", + .cra_priority = 500, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_aes_ctx), + .cra_alignmask = (__alignof__(u64) - 1), + .cra_module = THIS_MODULE, + }, + } + }, +}; + +static unsigned int tegra_gmac_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + data_count = (rctx->assoclen / AES_BLOCK_SIZE); + res_bits = (rctx->assoclen % AES_BLOCK_SIZE) * 8; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 4); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->assoclen); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_INIT | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static unsigned int tegra_gcm_crypt_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, res_bits, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr, op; + + data_count = (rctx->cryptlen / AES_BLOCK_SIZE); + res_bits = (rctx->cryptlen % AES_BLOCK_SIZE) * 8; + op = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + /* + * If there is no assoc data, + * this will be the init command + */ + if (!rctx->assoclen) + op |= SE_AES_OP_INIT; + + /* + * Hardware processes data_count + 1 blocks. + * Reduce 1 block if there is no residue + */ + if (!res_bits) + data_count--; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source Address */ + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->cryptlen); + + /* Destination Address */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(rctx->cryptlen); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + return i; +} + +static int tegra_gcm_prep_final_cmd(struct tegra_se *se, u32 *cpuvaddr, + struct tegra_aead_reqctx *rctx) +{ + unsigned int i = 0, j; + u32 op; + + op = SE_AES_OP_WRSTALL | SE_AES_OP_FINAL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + /* + * Set init for zero sized vector + */ + if (!rctx->assoclen && !rctx->cryptlen) + op |= SE_AES_OP_INIT; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->aad_len, 2); + cpuvaddr[i++] = rctx->assoclen * 8; + cpuvaddr[i++] = 0; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->cryp_msg_len, 2); + cpuvaddr[i++] = rctx->cryptlen * 8; + cpuvaddr[i++] = 0; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + + /* Destination Address */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(0x10); /* HW always generates 128-bit tag */ + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_gcm_do_gmac(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + unsigned int cmdlen; + + scatterwalk_map_and_copy(rctx->inbuf.buf, + rctx->src_sg, 0, rctx->assoclen, 0); + + rctx->config = tegra234_aes_cfg(SE_ALG_GMAC, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GMAC, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + cmdlen = tegra_gmac_prep_cmd(ctx, rctx); + + return tegra_se_host1x_submit(se, cmdlen); +} + +static int tegra_gcm_do_crypt(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + int cmdlen, ret; + + scatterwalk_map_and_copy(rctx->inbuf.buf, rctx->src_sg, + rctx->assoclen, rctx->cryptlen, 0); + + rctx->config = tegra234_aes_cfg(SE_ALG_GCM, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GCM, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_gcm_crypt_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + /* Copy the result */ + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + rctx->assoclen, rctx->cryptlen, 1); + + return 0; +} + +static int tegra_gcm_do_final(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + int cmdlen, ret, offset; + + rctx->config = tegra234_aes_cfg(SE_ALG_GCM_FINAL, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_GCM_FINAL, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_gcm_prep_final_cmd(se, cpuvaddr, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + if (rctx->encrypt) { + /* Copy the result */ + offset = rctx->assoclen + rctx->cryptlen; + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + offset, rctx->authsize, 1); + } + + return 0; +} + +static int tegra_gcm_do_verify(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + unsigned int offset; + u8 mac[16]; + + offset = rctx->assoclen + rctx->cryptlen; + scatterwalk_map_and_copy(mac, rctx->src_sg, offset, rctx->authsize, 0); + + if (crypto_memneq(rctx->outbuf.buf, mac, rctx->authsize)) + return -EBADMSG; + + return 0; +} + +static inline int tegra_ccm_check_iv(const u8 *iv) +{ + /* iv[0] gives value of q-1 + * 2 <= q <= 8 as per NIST 800-38C notation + * 2 <= L <= 8, so 1 <= L' <= 7. as per rfc 3610 notation + */ + if (iv[0] < 1 || iv[0] > 7) { + pr_debug("ccm_check_iv failed %d\n", iv[0]); + return -EINVAL; + } + + return 0; +} + +static unsigned int tegra_cbcmac_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int data_count, i = 0; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + data_count = (rctx->inbuf.size / AES_BLOCK_SIZE) - 1; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(0x10); /* HW always generates 128 bit tag */ + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | + SE_AES_OP_LASTBUF | SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static unsigned int tegra_ctr_prep_cmd(struct tegra_aead_ctx *ctx, + struct tegra_aead_reqctx *rctx) +{ + unsigned int i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = rctx->iv[j]; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = (rctx->inbuf.size / AES_BLOCK_SIZE) - 1; + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source address setting */ + cpuvaddr[i++] = lower_32_bits(rctx->inbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->inbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + /* Destination address setting */ + cpuvaddr[i++] = lower_32_bits(rctx->outbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->outbuf.addr)) | + SE_ADDR_HI_SZ(rctx->inbuf.size); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_LASTBUF | + SE_AES_OP_START; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "cfg %#x crypto cfg %#x\n", + rctx->config, rctx->crypto_config); + + return i; +} + +static int tegra_ccm_do_cbcmac(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + int cmdlen; + + rctx->config = tegra234_aes_cfg(SE_ALG_CBC_MAC, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_CBC_MAC, + rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Prepare command and submit */ + cmdlen = tegra_cbcmac_prep_cmd(ctx, rctx); + + return tegra_se_host1x_submit(se, cmdlen); +} + +static int tegra_ccm_set_msg_len(u8 *block, unsigned int msglen, int csize) +{ + __be32 data; + + memset(block, 0, csize); + block += csize; + + if (csize >= 4) + csize = 4; + else if (msglen > (1 << (8 * csize))) + return -EOVERFLOW; + + data = cpu_to_be32(msglen); + memcpy(block - csize, (u8 *)&data + 4 - csize, csize); + + return 0; +} + +static int tegra_ccm_format_nonce(struct tegra_aead_reqctx *rctx, u8 *nonce) +{ + unsigned int q, t; + u8 *q_ptr, *iv = (u8 *)rctx->iv; + + memcpy(nonce, rctx->iv, 16); + + /*** 1. Prepare Flags Octet ***/ + + /* Encode t (mac length) */ + t = rctx->authsize; + nonce[0] |= (((t - 2) / 2) << 3); + + /* Adata */ + if (rctx->assoclen) + nonce[0] |= (1 << 6); + + /*** Encode Q - message length ***/ + q = iv[0] + 1; + q_ptr = nonce + 16 - q; + + return tegra_ccm_set_msg_len(q_ptr, rctx->cryptlen, q); +} + +static int tegra_ccm_format_adata(u8 *adata, unsigned int a) +{ + int len = 0; + + /* add control info for associated data + * RFC 3610 and NIST Special Publication 800-38C + */ + if (a < 65280) { + *(__be16 *)adata = cpu_to_be16(a); + len = 2; + } else { + *(__be16 *)adata = cpu_to_be16(0xfffe); + *(__be32 *)&adata[2] = cpu_to_be32(a); + len = 6; + } + + return len; +} + +static int tegra_ccm_add_padding(u8 *buf, unsigned int len) +{ + unsigned int padlen = 16 - (len % 16); + u8 padding[16] = {0}; + + if (padlen == 16) + return 0; + + memcpy(buf, padding, padlen); + + return padlen; +} + +static int tegra_ccm_format_blocks(struct tegra_aead_reqctx *rctx) +{ + unsigned int alen = 0, offset = 0; + u8 nonce[16], adata[16]; + int ret; + + ret = tegra_ccm_format_nonce(rctx, nonce); + if (ret) + return ret; + + memcpy(rctx->inbuf.buf, nonce, 16); + offset = 16; + + if (rctx->assoclen) { + alen = tegra_ccm_format_adata(adata, rctx->assoclen); + memcpy(rctx->inbuf.buf + offset, adata, alen); + offset += alen; + + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, + rctx->src_sg, 0, rctx->assoclen, 0); + + offset += rctx->assoclen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, + rctx->assoclen + alen); + } + + return offset; +} + +static int tegra_ccm_mac_result(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + u32 result[16]; + int i, ret; + + /* Read and clear Result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + result[i] = readl(se->base + se->hw->regs->result + (i * 4)); + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + if (rctx->encrypt) { + memcpy(rctx->authdata, result, rctx->authsize); + } else { + ret = crypto_memneq(rctx->authdata, result, rctx->authsize); + if (ret) + return -EBADMSG; + } + + return 0; +} + +static int tegra_ccm_ctr_result(struct tegra_se *se, struct tegra_aead_reqctx *rctx) +{ + /* Copy result */ + scatterwalk_map_and_copy(rctx->outbuf.buf + 16, rctx->dst_sg, + rctx->assoclen, rctx->cryptlen, 1); + + if (rctx->encrypt) + scatterwalk_map_and_copy(rctx->outbuf.buf, rctx->dst_sg, + rctx->assoclen + rctx->cryptlen, + rctx->authsize, 1); + else + memcpy(rctx->authdata, rctx->outbuf.buf, rctx->authsize); + + return 0; +} + +static int tegra_ccm_compute_auth(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + struct scatterlist *sg; + int offset, ret; + + offset = tegra_ccm_format_blocks(rctx); + if (offset < 0) + return -EINVAL; + + /* Copy plain text to the buffer */ + sg = rctx->encrypt ? rctx->src_sg : rctx->dst_sg; + + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, + sg, rctx->assoclen, + rctx->cryptlen, 0); + offset += rctx->cryptlen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->cryptlen); + + rctx->inbuf.size = offset; + + ret = tegra_ccm_do_cbcmac(ctx, rctx); + if (ret) + return ret; + + return tegra_ccm_mac_result(se, rctx); +} + +static int tegra_ccm_do_ctr(struct tegra_aead_ctx *ctx, struct tegra_aead_reqctx *rctx) +{ + struct tegra_se *se = ctx->se; + unsigned int cmdlen, offset = 0; + struct scatterlist *sg = rctx->src_sg; + int ret; + + rctx->config = tegra234_aes_cfg(SE_ALG_CTR, rctx->encrypt); + rctx->crypto_config = tegra234_aes_crypto_cfg(SE_ALG_CTR, rctx->encrypt) | + SE_AES_KEY_INDEX(ctx->key_id); + + /* Copy authdata in the top of buffer for encryption/decryption */ + if (rctx->encrypt) + memcpy(rctx->inbuf.buf, rctx->authdata, rctx->authsize); + else + scatterwalk_map_and_copy(rctx->inbuf.buf, sg, + rctx->assoclen + rctx->cryptlen, + rctx->authsize, 0); + + offset += rctx->authsize; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->authsize); + + /* If there is no cryptlen, proceed to submit the task */ + if (rctx->cryptlen) { + scatterwalk_map_and_copy(rctx->inbuf.buf + offset, sg, + rctx->assoclen, rctx->cryptlen, 0); + offset += rctx->cryptlen; + offset += tegra_ccm_add_padding(rctx->inbuf.buf + offset, rctx->cryptlen); + } + + rctx->inbuf.size = offset; + + /* Prepare command and submit */ + cmdlen = tegra_ctr_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + return ret; + + return tegra_ccm_ctr_result(se, rctx); +} + +static int tegra_ccm_crypt_init(struct aead_request *req, struct tegra_se *se, + struct tegra_aead_reqctx *rctx) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + u8 *iv = (u8 *)rctx->iv; + int ret, i; + + rctx->src_sg = req->src; + rctx->dst_sg = req->dst; + rctx->assoclen = req->assoclen; + rctx->authsize = crypto_aead_authsize(tfm); + + memcpy(iv, req->iv, 16); + + ret = tegra_ccm_check_iv(iv); + if (ret) + return ret; + + /* Note: rfc 3610 and NIST 800-38C require counter (ctr_0) of + * zero to encrypt auth tag. + * req->iv has the formatted ctr_0 (i.e. Flags || N || 0). + */ + memset(iv + 15 - iv[0], 0, iv[0] + 1); + + /* Clear any previous result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + return 0; +} + +static int tegra_ccm_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = container_of(areq, struct aead_request, base); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret; + + /* Allocate buffers required */ + rctx->inbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->inbuf.addr, GFP_KERNEL); + if (!rctx->inbuf.buf) + return -ENOMEM; + + rctx->inbuf.size = SE_AES_BUFLEN; + + rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->outbuf.addr, GFP_KERNEL); + if (!rctx->outbuf.buf) { + ret = -ENOMEM; + goto outbuf_err; + } + + rctx->outbuf.size = SE_AES_BUFLEN; + + ret = tegra_ccm_crypt_init(req, se, rctx); + if (ret) + goto out; + + if (rctx->encrypt) { + rctx->cryptlen = req->cryptlen; + + /* CBC MAC Operation */ + ret = tegra_ccm_compute_auth(ctx, rctx); + if (ret) + goto out; + + /* CTR operation */ + ret = tegra_ccm_do_ctr(ctx, rctx); + if (ret) + goto out; + } else { + rctx->cryptlen = req->cryptlen - ctx->authsize; + if (ret) + goto out; + + /* CTR operation */ + ret = tegra_ccm_do_ctr(ctx, rctx); + if (ret) + goto out; + + /* CBC MAC Operation */ + ret = tegra_ccm_compute_auth(ctx, rctx); + if (ret) + goto out; + } + +out: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->outbuf.buf, rctx->outbuf.addr); + +outbuf_err: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->inbuf.buf, rctx->inbuf.addr); + + crypto_finalize_aead_request(ctx->se->engine, req, ret); + + return 0; +} + +static int tegra_gcm_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct aead_request *req = container_of(areq, struct aead_request, base); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + int ret; + + /* Allocate buffers required */ + rctx->inbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->inbuf.addr, GFP_KERNEL); + if (!rctx->inbuf.buf) + return -ENOMEM; + + rctx->inbuf.size = SE_AES_BUFLEN; + + rctx->outbuf.buf = dma_alloc_coherent(ctx->se->dev, SE_AES_BUFLEN, + &rctx->outbuf.addr, GFP_KERNEL); + if (!rctx->outbuf.buf) { + ret = -ENOMEM; + goto outbuf_err; + } + + rctx->outbuf.size = SE_AES_BUFLEN; + + rctx->src_sg = req->src; + rctx->dst_sg = req->dst; + rctx->assoclen = req->assoclen; + rctx->authsize = crypto_aead_authsize(tfm); + + if (rctx->encrypt) + rctx->cryptlen = req->cryptlen; + else + rctx->cryptlen = req->cryptlen - ctx->authsize; + + memcpy(rctx->iv, req->iv, GCM_AES_IV_SIZE); + rctx->iv[3] = (1 << 24); + + /* If there is associated data perform GMAC operation */ + if (rctx->assoclen) { + ret = tegra_gcm_do_gmac(ctx, rctx); + if (ret) + goto out; + } + + /* GCM Encryption/Decryption operation */ + if (rctx->cryptlen) { + ret = tegra_gcm_do_crypt(ctx, rctx); + if (ret) + goto out; + } + + /* GCM_FINAL operation */ + ret = tegra_gcm_do_final(ctx, rctx); + if (ret) + goto out; + + if (!rctx->encrypt) + ret = tegra_gcm_do_verify(ctx->se, rctx); + +out: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->outbuf.buf, rctx->outbuf.addr); + +outbuf_err: + dma_free_coherent(ctx->se->dev, SE_AES_BUFLEN, + rctx->inbuf.buf, rctx->inbuf.addr); + + /* Finalize the request if there are no errors */ + crypto_finalize_aead_request(ctx->se->engine, req, ret); + + return 0; +} + +static int tegra_aead_cra_init(struct crypto_aead *tfm) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct aead_alg *alg = crypto_aead_alg(tfm); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(&tfm->base); + + se_alg = container_of(alg, struct tegra_se_alg, alg.aead.base); + + crypto_aead_set_reqsize(tfm, sizeof(struct tegra_aead_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + return 0; +} + +static int tegra_ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + + switch (authsize) { + case 4: + case 6: + case 8: + case 10: + case 12: + case 14: + case 16: + break; + default: + return -EINVAL; + } + + ctx->authsize = authsize; + + return 0; +} + +static int tegra_gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + int ret; + + ret = crypto_gcm_check_authsize(authsize); + if (ret) + return ret; + + ctx->authsize = authsize; + + return 0; +} + +static void tegra_aead_cra_exit(struct crypto_aead *tfm) +{ + struct tegra_aead_ctx *ctx = crypto_tfm_ctx(&tfm->base); + + if (ctx->key_id) + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_aead_crypt(struct aead_request *req, bool encrypt) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct tegra_aead_reqctx *rctx = aead_request_ctx(req); + + rctx->encrypt = encrypt; + + return crypto_transfer_aead_request_to_engine(ctx->se->engine, req); +} + +static int tegra_aead_encrypt(struct aead_request *req) +{ + return tegra_aead_crypt(req, true); +} + +static int tegra_aead_decrypt(struct aead_request *req) +{ + return tegra_aead_crypt(req, false); +} + +static int tegra_aead_setkey(struct crypto_aead *tfm, + const u8 *key, u32 keylen) +{ + struct tegra_aead_ctx *ctx = crypto_aead_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static unsigned int tegra_cmac_prep_cmd(struct tegra_cmac_ctx *ctx, + struct tegra_cmac_reqctx *rctx) +{ + unsigned int data_count, res_bits = 0, i = 0, j; + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr, op; + + data_count = (rctx->datbuf.size / AES_BLOCK_SIZE); + + op = SE_AES_OP_WRSTALL | SE_AES_OP_START | SE_AES_OP_LASTBUF; + + if (!(rctx->task & SHA_UPDATE)) { + op |= SE_AES_OP_FINAL; + res_bits = (rctx->datbuf.size % AES_BLOCK_SIZE) * 8; + } + + if (!res_bits && data_count) + data_count--; + + if (rctx->task & SHA_FIRST) { + rctx->task &= ~SHA_FIRST; + + cpuvaddr[i++] = host1x_opcode_setpayload(SE_CRYPTO_CTR_REG_COUNT); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->linear_ctr); + /* Load 0 IV */ + for (j = 0; j < SE_CRYPTO_CTR_REG_COUNT; j++) + cpuvaddr[i++] = 0; + } + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->last_blk, 1); + cpuvaddr[i++] = SE_LAST_BLOCK_VAL(data_count) | + SE_LAST_BLOCK_RES_BITS(res_bits); + + cpuvaddr[i++] = se_host1x_opcode_incr(se->hw->regs->config, 6); + cpuvaddr[i++] = rctx->config; + cpuvaddr[i++] = rctx->crypto_config; + + /* Source Address */ + cpuvaddr[i++] = lower_32_bits(rctx->datbuf.addr); + cpuvaddr[i++] = SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) | + SE_ADDR_HI_SZ(rctx->datbuf.size); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = SE_ADDR_HI_SZ(AES_BLOCK_SIZE); + + cpuvaddr[i++] = se_host1x_opcode_nonincr(se->hw->regs->op, 1); + cpuvaddr[i++] = op; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + return i; +} + +static void tegra_cmac_copy_result(struct tegra_se *se, struct tegra_cmac_reqctx *rctx) +{ + int i; + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4)); +} + +static void tegra_cmac_paste_result(struct tegra_se *se, struct tegra_cmac_reqctx *rctx) +{ + int i; + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(rctx->result[i], + se->base + se->hw->regs->result + (i * 4)); +} + +static int tegra_cmac_do_update(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + unsigned int nblks, nresidue, cmdlen; + int ret; + + if (!req->nbytes) + return 0; + + nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size; + nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size; + + /* + * Reserve the last block as residue during final() to process. + */ + if (!nresidue && nblks) { + nresidue += rctx->blk_size; + nblks--; + } + + rctx->src_sg = req->src; + rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue; + rctx->total_len += rctx->datbuf.size; + rctx->config = tegra234_aes_cfg(SE_ALG_CMAC, 0); + rctx->crypto_config = SE_AES_KEY_INDEX(ctx->key_id); + + /* + * Keep one block and residue bytes in residue and + * return. The bytes will be processed in final() + */ + if (nblks < 1) { + scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes, 0); + + rctx->residue.size += req->nbytes; + return 0; + } + + /* Copy the previous residue first */ + if (rctx->residue.size) + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + + scatterwalk_map_and_copy(rctx->datbuf.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes - nresidue, 0); + + scatterwalk_map_and_copy(rctx->residue.buf, rctx->src_sg, + req->nbytes - nresidue, nresidue, 0); + + /* Update residue value with the residue after current block */ + rctx->residue.size = nresidue; + + /* + * If this is not the first 'update' call, paste the previous copied + * intermediate results to the registers so that it gets picked up. + * This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_cmac_paste_result(ctx->se, rctx); + + cmdlen = tegra_cmac_prep_cmd(ctx, rctx); + + ret = tegra_se_host1x_submit(se, cmdlen); + /* + * If this is not the final update, copy the intermediate results + * from the registers so that it can be used in the next 'update' + * call. This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FINAL)) + tegra_cmac_copy_result(ctx->se, rctx); + + return ret; +} + +static int tegra_cmac_do_final(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + u32 *result = (u32 *)req->result; + int ret = 0, i, cmdlen; + + if (!req->nbytes && !rctx->total_len && ctx->fallback_tfm) { + return crypto_shash_tfm_digest(ctx->fallback_tfm, + rctx->datbuf.buf, 0, req->result); + } + + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + rctx->datbuf.size = rctx->residue.size; + rctx->total_len += rctx->residue.size; + rctx->config = tegra234_aes_cfg(SE_ALG_CMAC, 0); + + /* Prepare command and submit */ + cmdlen = tegra_cmac_prep_cmd(ctx, rctx); + ret = tegra_se_host1x_submit(se, cmdlen); + if (ret) + goto out; + + /* Read and clear Result register */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + result[i] = readl(se->base + se->hw->regs->result + (i * 4)); + + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + +out: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + dma_free_coherent(se->dev, crypto_ahash_blocksize(tfm) * 2, + rctx->residue.buf, rctx->residue.addr); + return ret; +} + +static int tegra_cmac_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret; + + if (rctx->task & SHA_UPDATE) { + ret = tegra_cmac_do_update(req); + rctx->task &= ~SHA_UPDATE; + } + + if (rctx->task & SHA_FINAL) { + ret = tegra_cmac_do_final(req); + rctx->task &= ~SHA_FINAL; + } + + crypto_finalize_hash_request(se->engine, req, ret); + + return 0; +} + +static void tegra_cmac_init_fallback(struct crypto_ahash *tfm, struct tegra_cmac_ctx *ctx, + const char *algname) +{ + unsigned int statesize; + + ctx->fallback_tfm = crypto_alloc_shash(algname, 0, CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(ctx->fallback_tfm)) { + dev_warn(ctx->se->dev, "failed to allocate fallback for %s\n", algname); + ctx->fallback_tfm = NULL; + return; + } + + statesize = crypto_shash_statesize(ctx->fallback_tfm); + + if (statesize > sizeof(struct tegra_cmac_reqctx)) + crypto_ahash_set_statesize(tfm, statesize); +} + +static int tegra_cmac_cra_init(struct crypto_tfm *tfm) +{ + struct tegra_cmac_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash_tfm = __crypto_ahash_cast(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(tfm); + se_alg = container_of(alg, struct tegra_se_alg, alg.ahash.base); + + crypto_ahash_set_reqsize(ahash_tfm, sizeof(struct tegra_cmac_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + ctx->alg = ret; + + tegra_cmac_init_fallback(ahash_tfm, ctx, algname); + + return 0; +} + +static void tegra_cmac_cra_exit(struct crypto_tfm *tfm) +{ + struct tegra_cmac_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback_tfm) + crypto_free_shash(ctx->fallback_tfm); + + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_cmac_init(struct ahash_request *req) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int i; + + rctx->total_len = 0; + rctx->datbuf.size = 0; + rctx->residue.size = 0; + rctx->task = SHA_FIRST; + rctx->blk_size = crypto_ahash_blocksize(tfm); + + rctx->residue.buf = dma_alloc_coherent(se->dev, rctx->blk_size * 2, + &rctx->residue.addr, GFP_KERNEL); + if (!rctx->residue.buf) + goto resbuf_fail; + + rctx->residue.size = 0; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_SHA_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + goto datbuf_fail; + + rctx->datbuf.size = 0; + + /* Clear any previous result */ + for (i = 0; i < CMAC_RESULT_REG_COUNT; i++) + writel(0, se->base + se->hw->regs->result + (i * 4)); + + return 0; + +datbuf_fail: + dma_free_coherent(se->dev, rctx->blk_size, rctx->residue.buf, + rctx->residue.addr); +resbuf_fail: + return -ENOMEM; +} + +static int tegra_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + + if (aes_check_keylen(keylen)) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + if (ctx->fallback_tfm) + crypto_shash_setkey(ctx->fallback_tfm, key, keylen); + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static int tegra_cmac_update(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_UPDATE; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_final(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_finup(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_digest(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_cmac_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + tegra_cmac_init(req); + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_cmac_export(struct ahash_request *req, void *out) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + memcpy(out, rctx, sizeof(*rctx)); + + return 0; +} + +static int tegra_cmac_import(struct ahash_request *req, const void *in) +{ + struct tegra_cmac_reqctx *rctx = ahash_request_ctx(req); + + memcpy(rctx, in, sizeof(*rctx)); + + return 0; +} + +static struct tegra_se_alg tegra_aead_algs[] = { + { + .alg.aead.op.do_one_request = tegra_gcm_do_one_req, + .alg.aead.base = { + .init = tegra_aead_cra_init, + .exit = tegra_aead_cra_exit, + .setkey = tegra_aead_setkey, + .setauthsize = tegra_gcm_setauthsize, + .encrypt = tegra_aead_encrypt, + .decrypt = tegra_aead_decrypt, + .maxauthsize = AES_BLOCK_SIZE, + .ivsize = GCM_AES_IV_SIZE, + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "gcm-aes-tegra", + .cra_priority = 500, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aead_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + }, { + .alg.aead.op.do_one_request = tegra_ccm_do_one_req, + .alg.aead.base = { + .init = tegra_aead_cra_init, + .exit = tegra_aead_cra_exit, + .setkey = tegra_aead_setkey, + .setauthsize = tegra_ccm_setauthsize, + .encrypt = tegra_aead_encrypt, + .decrypt = tegra_aead_decrypt, + .maxauthsize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .chunksize = AES_BLOCK_SIZE, + .base = { + .cra_name = "ccm(aes)", + .cra_driver_name = "ccm-aes-tegra", + .cra_priority = 500, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct tegra_aead_ctx), + .cra_alignmask = 0xf, + .cra_module = THIS_MODULE, + }, + } + } +}; + +static struct tegra_se_alg tegra_cmac_algs[] = { + { + .alg.ahash.op.do_one_request = tegra_cmac_do_one_req, + .alg.ahash.base = { + .init = tegra_cmac_init, + .setkey = tegra_cmac_setkey, + .update = tegra_cmac_update, + .final = tegra_cmac_final, + .finup = tegra_cmac_finup, + .digest = tegra_cmac_digest, + .export = tegra_cmac_export, + .import = tegra_cmac_import, + .halg.digestsize = AES_BLOCK_SIZE, + .halg.statesize = sizeof(struct tegra_cmac_reqctx), + .halg.base = { + .cra_name = "cmac(aes)", + .cra_driver_name = "tegra-se-cmac", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_cmac_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_cmac_cra_init, + .cra_exit = tegra_cmac_cra_exit, + } + } + } +}; + +int tegra_init_aes(struct tegra_se *se) +{ + struct aead_engine_alg *aead_alg; + struct ahash_engine_alg *ahash_alg; + struct skcipher_engine_alg *sk_alg; + int i, ret; + + se->manifest = tegra_aes_kac_manifest; + + for (i = 0; i < ARRAY_SIZE(tegra_aes_algs); i++) { + sk_alg = &tegra_aes_algs[i].alg.skcipher; + tegra_aes_algs[i].se_dev = se; + + ret = crypto_engine_register_skcipher(sk_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + sk_alg->base.base.cra_name); + goto err_aes; + } + } + + for (i = 0; i < ARRAY_SIZE(tegra_aead_algs); i++) { + aead_alg = &tegra_aead_algs[i].alg.aead; + tegra_aead_algs[i].se_dev = se; + + ret = crypto_engine_register_aead(aead_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + aead_alg->base.base.cra_name); + goto err_aead; + } + } + + for (i = 0; i < ARRAY_SIZE(tegra_cmac_algs); i++) { + ahash_alg = &tegra_cmac_algs[i].alg.ahash; + tegra_cmac_algs[i].se_dev = se; + + ret = crypto_engine_register_ahash(ahash_alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + ahash_alg->base.halg.base.cra_name); + goto err_cmac; + } + } + + return 0; + +err_cmac: + while (i--) + crypto_engine_unregister_ahash(&tegra_cmac_algs[i].alg.ahash); + + i = ARRAY_SIZE(tegra_aead_algs); +err_aead: + while (i--) + crypto_engine_unregister_aead(&tegra_aead_algs[i].alg.aead); + + i = ARRAY_SIZE(tegra_aes_algs); +err_aes: + while (i--) + crypto_engine_unregister_skcipher(&tegra_aes_algs[i].alg.skcipher); + + return ret; +} + +void tegra_deinit_aes(struct tegra_se *se) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tegra_aes_algs); i++) + crypto_engine_unregister_skcipher(&tegra_aes_algs[i].alg.skcipher); + + for (i = 0; i < ARRAY_SIZE(tegra_aead_algs); i++) + crypto_engine_unregister_aead(&tegra_aead_algs[i].alg.aead); + + for (i = 0; i < ARRAY_SIZE(tegra_cmac_algs); i++) + crypto_engine_unregister_ahash(&tegra_cmac_algs[i].alg.ahash); +} diff --git a/drivers/crypto/tegra/tegra-se-hash.c b/drivers/crypto/tegra/tegra-se-hash.c new file mode 100644 index 0000000000..4d4bd727f4 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-hash.c @@ -0,0 +1,1060 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver to handle HASH algorithms using NVIDIA Security Engine. + */ + +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> + +#include <crypto/aes.h> +#include <crypto/sha1.h> +#include <crypto/sha2.h> +#include <crypto/sha3.h> +#include <crypto/internal/des.h> +#include <crypto/engine.h> +#include <crypto/scatterwalk.h> +#include <crypto/internal/hash.h> + +#include "tegra-se.h" + +struct tegra_sha_ctx { + struct tegra_se *se; + unsigned int alg; + bool fallback; + u32 key_id; + struct crypto_ahash *fallback_tfm; +}; + +struct tegra_sha_reqctx { + struct scatterlist *src_sg; + struct tegra_se_datbuf datbuf; + struct tegra_se_datbuf residue; + struct tegra_se_datbuf digest; + unsigned int alg; + unsigned int config; + unsigned int total_len; + unsigned int blk_size; + unsigned int task; + u32 key_id; + u32 result[HASH_RESULT_REG_COUNT]; + struct ahash_request fallback_req; +}; + +static int tegra_sha_get_config(u32 alg) +{ + int cfg = 0; + + switch (alg) { + case SE_ALG_SHA1: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA1; + break; + + case SE_ALG_HMAC_SHA224: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA224: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA224; + break; + + case SE_ALG_HMAC_SHA256: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA256: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA256; + break; + + case SE_ALG_HMAC_SHA384: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA384: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA384; + break; + + case SE_ALG_HMAC_SHA512: + cfg |= SE_SHA_ENC_ALG_HMAC; + fallthrough; + case SE_ALG_SHA512: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA512; + break; + + case SE_ALG_SHA3_224: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_224; + break; + case SE_ALG_SHA3_256: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_256; + break; + case SE_ALG_SHA3_384: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_384; + break; + case SE_ALG_SHA3_512: + cfg |= SE_SHA_ENC_ALG_SHA; + cfg |= SE_SHA_ENC_MODE_SHA3_512; + break; + default: + return -EINVAL; + } + + return cfg; +} + +static int tegra_sha_fallback_init(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_init(&rctx->fallback_req); +} + +static int tegra_sha_fallback_update(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + + return crypto_ahash_update(&rctx->fallback_req); +} + +static int tegra_sha_fallback_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + rctx->fallback_req.result = req->result; + + return crypto_ahash_final(&rctx->fallback_req); +} + +static int tegra_sha_fallback_finup(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + rctx->fallback_req.result = req->result; + + return crypto_ahash_finup(&rctx->fallback_req); +} + +static int tegra_sha_fallback_digest(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + rctx->fallback_req.nbytes = req->nbytes; + rctx->fallback_req.src = req->src; + rctx->fallback_req.result = req->result; + + return crypto_ahash_digest(&rctx->fallback_req); +} + +static int tegra_sha_fallback_import(struct ahash_request *req, const void *in) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_import(&rctx->fallback_req, in); +} + +static int tegra_sha_fallback_export(struct ahash_request *req, void *out) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + ahash_request_set_tfm(&rctx->fallback_req, ctx->fallback_tfm); + rctx->fallback_req.base.flags = req->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + return crypto_ahash_export(&rctx->fallback_req, out); +} + +static int tegra_sha_prep_cmd(struct tegra_se *se, u32 *cpuvaddr, + struct tegra_sha_reqctx *rctx) +{ + u64 msg_len, msg_left; + int i = 0; + + msg_len = rctx->total_len * 8; + msg_left = rctx->datbuf.size * 8; + + /* + * If IN_ADDR_HI_0.SZ > SHA_MSG_LEFT_[0-3] to the HASH engine, + * HW treats it as the last buffer and process the data. + * Therefore, add an extra byte to msg_left if it is not the + * last buffer. + */ + if (rctx->task & SHA_UPDATE) { + msg_left += 8; + msg_len += 8; + } + + cpuvaddr[i++] = host1x_opcode_setpayload(8); + cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_MSG_LENGTH); + cpuvaddr[i++] = lower_32_bits(msg_len); + cpuvaddr[i++] = upper_32_bits(msg_len); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = lower_32_bits(msg_left); + cpuvaddr[i++] = upper_32_bits(msg_left); + cpuvaddr[i++] = 0; + cpuvaddr[i++] = 0; + cpuvaddr[i++] = host1x_opcode_setpayload(6); + cpuvaddr[i++] = se_host1x_opcode_incr_w(SE_SHA_CFG); + cpuvaddr[i++] = rctx->config; + + if (rctx->task & SHA_FIRST) { + cpuvaddr[i++] = SE_SHA_TASK_HASH_INIT; + rctx->task &= ~SHA_FIRST; + } else { + cpuvaddr[i++] = 0; + } + + cpuvaddr[i++] = rctx->datbuf.addr; + cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->datbuf.addr)) | + SE_ADDR_HI_SZ(rctx->datbuf.size)); + cpuvaddr[i++] = rctx->digest.addr; + cpuvaddr[i++] = (u32)(SE_ADDR_HI_MSB(upper_32_bits(rctx->digest.addr)) | + SE_ADDR_HI_SZ(rctx->digest.size)); + if (rctx->key_id) { + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_CRYPTO_CFG); + cpuvaddr[i++] = SE_AES_KEY_INDEX(rctx->key_id); + } + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_nonincr_w(SE_SHA_OPERATION); + cpuvaddr[i++] = SE_SHA_OP_WRSTALL | + SE_SHA_OP_START | + SE_SHA_OP_LASTBUF; + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "msg len %llu msg left %llu cfg %#x", + msg_len, msg_left, rctx->config); + + return i; +} + +static void tegra_sha_copy_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx) +{ + int i; + + for (i = 0; i < HASH_RESULT_REG_COUNT; i++) + rctx->result[i] = readl(se->base + se->hw->regs->result + (i * 4)); +} + +static void tegra_sha_paste_hash_result(struct tegra_se *se, struct tegra_sha_reqctx *rctx) +{ + int i; + + for (i = 0; i < HASH_RESULT_REG_COUNT; i++) + writel(rctx->result[i], + se->base + se->hw->regs->result + (i * 4)); +} + +static int tegra_sha_do_update(struct ahash_request *req) +{ + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + unsigned int nblks, nresidue, size, ret; + u32 *cpuvaddr = ctx->se->cmdbuf->addr; + + nresidue = (req->nbytes + rctx->residue.size) % rctx->blk_size; + nblks = (req->nbytes + rctx->residue.size) / rctx->blk_size; + + /* + * If nbytes is a multiple of block size and there is no residue, + * then reserve the last block as residue during final() to process. + */ + if (!nresidue && nblks) { + nresidue = rctx->blk_size; + nblks--; + } + + rctx->src_sg = req->src; + rctx->datbuf.size = (req->nbytes + rctx->residue.size) - nresidue; + rctx->total_len += rctx->datbuf.size; + + /* + * If nbytes are less than a block size, copy it residue and + * return. The bytes will be processed in final() + */ + if (nblks < 1) { + scatterwalk_map_and_copy(rctx->residue.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes, 0); + + rctx->residue.size += req->nbytes; + return 0; + } + + /* Copy the previous residue first */ + if (rctx->residue.size) + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + + scatterwalk_map_and_copy(rctx->datbuf.buf + rctx->residue.size, + rctx->src_sg, 0, req->nbytes - nresidue, 0); + + scatterwalk_map_and_copy(rctx->residue.buf, rctx->src_sg, + req->nbytes - nresidue, nresidue, 0); + + /* Update residue value with the residue after current block */ + rctx->residue.size = nresidue; + + rctx->config = tegra_sha_get_config(rctx->alg) | + SE_SHA_DST_HASH_REG; + + /* + * If this is not the first 'update' call, paste the previous copied + * intermediate results to the registers so that it gets picked up. + * This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FIRST)) + tegra_sha_paste_hash_result(ctx->se, rctx); + + size = tegra_sha_prep_cmd(ctx->se, cpuvaddr, rctx); + + ret = tegra_se_host1x_submit(ctx->se, size); + + /* + * If this is not the final update, copy the intermediate results + * from the registers so that it can be used in the next 'update' + * call. This is to support the import/export functionality. + */ + if (!(rctx->task & SHA_FINAL)) + tegra_sha_copy_hash_result(ctx->se, rctx); + + return ret; +} + +static int tegra_sha_do_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + u32 *cpuvaddr = se->cmdbuf->addr; + int size, ret = 0; + + memcpy(rctx->datbuf.buf, rctx->residue.buf, rctx->residue.size); + rctx->datbuf.size = rctx->residue.size; + rctx->total_len += rctx->residue.size; + + rctx->config = tegra_sha_get_config(rctx->alg) | + SE_SHA_DST_MEMORY; + + size = tegra_sha_prep_cmd(se, cpuvaddr, rctx); + + ret = tegra_se_host1x_submit(se, size); + if (ret) + goto out; + + /* Copy result */ + memcpy(req->result, rctx->digest.buf, rctx->digest.size); + +out: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, + rctx->datbuf.buf, rctx->datbuf.addr); + dma_free_coherent(se->dev, crypto_ahash_blocksize(tfm), + rctx->residue.buf, rctx->residue.addr); + dma_free_coherent(se->dev, rctx->digest.size, rctx->digest.buf, + rctx->digest.addr); + return ret; +} + +static int tegra_sha_do_one_req(struct crypto_engine *engine, void *areq) +{ + struct ahash_request *req = ahash_request_cast(areq); + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + int ret = 0; + + if (rctx->task & SHA_UPDATE) { + ret = tegra_sha_do_update(req); + rctx->task &= ~SHA_UPDATE; + } + + if (rctx->task & SHA_FINAL) { + ret = tegra_sha_do_final(req); + rctx->task &= ~SHA_FINAL; + } + + crypto_finalize_hash_request(se->engine, req, ret); + + return 0; +} + +static void tegra_sha_init_fallback(struct crypto_ahash *tfm, struct tegra_sha_ctx *ctx, + const char *algname) +{ + unsigned int statesize; + + ctx->fallback_tfm = crypto_alloc_ahash(algname, 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); + + if (IS_ERR(ctx->fallback_tfm)) { + dev_warn(ctx->se->dev, + "failed to allocate fallback for %s\n", algname); + ctx->fallback_tfm = NULL; + return; + } + + statesize = crypto_ahash_statesize(ctx->fallback_tfm); + + if (statesize > sizeof(struct tegra_sha_reqctx)) + crypto_ahash_set_statesize(tfm, statesize); + + /* Update reqsize if fallback is added */ + crypto_ahash_set_reqsize(tfm, + sizeof(struct tegra_sha_reqctx) + + crypto_ahash_reqsize(ctx->fallback_tfm)); +} + +static int tegra_sha_cra_init(struct crypto_tfm *tfm) +{ + struct tegra_sha_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash_tfm = __crypto_ahash_cast(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); + struct tegra_se_alg *se_alg; + const char *algname; + int ret; + + algname = crypto_tfm_alg_name(tfm); + se_alg = container_of(alg, struct tegra_se_alg, alg.ahash.base); + + crypto_ahash_set_reqsize(ahash_tfm, sizeof(struct tegra_sha_reqctx)); + + ctx->se = se_alg->se_dev; + ctx->fallback = false; + ctx->key_id = 0; + + ret = se_algname_to_algid(algname); + if (ret < 0) { + dev_err(ctx->se->dev, "invalid algorithm\n"); + return ret; + } + + if (se_alg->alg_base) + tegra_sha_init_fallback(ahash_tfm, ctx, algname); + + ctx->alg = ret; + + return 0; +} + +static void tegra_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct tegra_sha_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback_tfm) + crypto_free_ahash(ctx->fallback_tfm); + + tegra_key_invalidate(ctx->se, ctx->key_id, ctx->alg); +} + +static int tegra_sha_init(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + struct tegra_se *se = ctx->se; + + if (ctx->fallback) + return tegra_sha_fallback_init(req); + + rctx->total_len = 0; + rctx->datbuf.size = 0; + rctx->residue.size = 0; + rctx->key_id = ctx->key_id; + rctx->task = SHA_FIRST; + rctx->alg = ctx->alg; + rctx->blk_size = crypto_ahash_blocksize(tfm); + rctx->digest.size = crypto_ahash_digestsize(tfm); + + rctx->digest.buf = dma_alloc_coherent(se->dev, rctx->digest.size, + &rctx->digest.addr, GFP_KERNEL); + if (!rctx->digest.buf) + goto digbuf_fail; + + rctx->residue.buf = dma_alloc_coherent(se->dev, rctx->blk_size, + &rctx->residue.addr, GFP_KERNEL); + if (!rctx->residue.buf) + goto resbuf_fail; + + rctx->datbuf.buf = dma_alloc_coherent(se->dev, SE_SHA_BUFLEN, + &rctx->datbuf.addr, GFP_KERNEL); + if (!rctx->datbuf.buf) + goto datbuf_fail; + + return 0; + +datbuf_fail: + dma_free_coherent(se->dev, rctx->blk_size, rctx->residue.buf, + rctx->residue.addr); +resbuf_fail: + dma_free_coherent(se->dev, SE_SHA_BUFLEN, rctx->datbuf.buf, + rctx->datbuf.addr); +digbuf_fail: + return -ENOMEM; +} + +static int tegra_hmac_fallback_setkey(struct tegra_sha_ctx *ctx, const u8 *key, + unsigned int keylen) +{ + if (!ctx->fallback_tfm) { + dev_dbg(ctx->se->dev, "invalid key length (%d)\n", keylen); + return -EINVAL; + } + + ctx->fallback = true; + return crypto_ahash_setkey(ctx->fallback_tfm, key, keylen); +} + +static int tegra_hmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (aes_check_keylen(keylen)) + return tegra_hmac_fallback_setkey(ctx, key, keylen); + + ctx->fallback = false; + + return tegra_key_submit(ctx->se, key, keylen, ctx->alg, &ctx->key_id); +} + +static int tegra_sha_update(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_update(req); + + rctx->task |= SHA_UPDATE; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_final(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_final(req); + + rctx->task |= SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_finup(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_finup(req); + + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_digest(struct ahash_request *req) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_digest(req); + + tegra_sha_init(req); + rctx->task |= SHA_UPDATE | SHA_FINAL; + + return crypto_transfer_hash_request_to_engine(ctx->se->engine, req); +} + +static int tegra_sha_export(struct ahash_request *req, void *out) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_export(req, out); + + memcpy(out, rctx, sizeof(*rctx)); + + return 0; +} + +static int tegra_sha_import(struct ahash_request *req, const void *in) +{ + struct tegra_sha_reqctx *rctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct tegra_sha_ctx *ctx = crypto_ahash_ctx(tfm); + + if (ctx->fallback) + return tegra_sha_fallback_import(req, in); + + memcpy(rctx, in, sizeof(*rctx)); + + return 0; +} + +static struct tegra_se_alg tegra_hash_algs[] = { + { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "tegra-se-sha1", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "tegra-se-sha224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "tegra-se-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "tegra-se-sha384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "tegra-se-sha512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-224", + .cra_driver_name = "tegra-se-sha3-224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-256", + .cra_driver_name = "tegra-se-sha3-256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-384", + .cra_driver_name = "tegra-se-sha3-384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .halg.digestsize = SHA3_512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "sha3-512", + .cra_driver_name = "tegra-se-sha3-512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH, + .cra_blocksize = SHA3_512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha224", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "tegra-se-hmac-sha224", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha256", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "tegra-se-hmac-sha256", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha384", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "tegra-se-hmac-sha384", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + }, { + .alg_base = "sha512", + .alg.ahash.op.do_one_request = tegra_sha_do_one_req, + .alg.ahash.base = { + .init = tegra_sha_init, + .update = tegra_sha_update, + .final = tegra_sha_final, + .finup = tegra_sha_finup, + .digest = tegra_sha_digest, + .export = tegra_sha_export, + .import = tegra_sha_import, + .setkey = tegra_hmac_setkey, + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.statesize = sizeof(struct tegra_sha_reqctx), + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "tegra-se-hmac-sha512", + .cra_priority = 300, + .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct tegra_sha_ctx), + .cra_alignmask = 0, + .cra_module = THIS_MODULE, + .cra_init = tegra_sha_cra_init, + .cra_exit = tegra_sha_cra_exit, + } + } + } +}; + +static int tegra_hash_kac_manifest(u32 user, u32 alg, u32 keylen) +{ + int manifest; + + manifest = SE_KAC_USER_NS; + + switch (alg) { + case SE_ALG_HMAC_SHA224: + case SE_ALG_HMAC_SHA256: + case SE_ALG_HMAC_SHA384: + case SE_ALG_HMAC_SHA512: + manifest |= SE_KAC_HMAC; + break; + default: + return -EINVAL; + } + + switch (keylen) { + case AES_KEYSIZE_128: + manifest |= SE_KAC_SIZE_128; + break; + case AES_KEYSIZE_192: + manifest |= SE_KAC_SIZE_192; + break; + case AES_KEYSIZE_256: + default: + manifest |= SE_KAC_SIZE_256; + break; + } + + return manifest; +} + +int tegra_init_hash(struct tegra_se *se) +{ + struct ahash_engine_alg *alg; + int i, ret; + + se->manifest = tegra_hash_kac_manifest; + + for (i = 0; i < ARRAY_SIZE(tegra_hash_algs); i++) { + tegra_hash_algs[i].se_dev = se; + alg = &tegra_hash_algs[i].alg.ahash; + + ret = crypto_engine_register_ahash(alg); + if (ret) { + dev_err(se->dev, "failed to register %s\n", + alg->base.halg.base.cra_name); + goto sha_err; + } + } + + return 0; + +sha_err: + while (i--) + crypto_engine_unregister_ahash(&tegra_hash_algs[i].alg.ahash); + + return ret; +} + +void tegra_deinit_hash(struct tegra_se *se) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tegra_hash_algs); i++) + crypto_engine_unregister_ahash(&tegra_hash_algs[i].alg.ahash); +} diff --git a/drivers/crypto/tegra/tegra-se-key.c b/drivers/crypto/tegra/tegra-se-key.c new file mode 100644 index 0000000000..ac14678dbd --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-key.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver file to manage keys of NVIDIA Security Engine. + */ + +#include <linux/bitops.h> +#include <linux/module.h> +#include <crypto/aes.h> + +#include "tegra-se.h" + +#define SE_KEY_FULL_MASK GENMASK(SE_MAX_KEYSLOT, 0) + +/* Reserve keyslot 0, 14, 15 */ +#define SE_KEY_RSVD_MASK (BIT(0) | BIT(14) | BIT(15)) +#define SE_KEY_VALID_MASK (SE_KEY_FULL_MASK & ~SE_KEY_RSVD_MASK) + +/* Mutex lock to guard keyslots */ +static DEFINE_MUTEX(kslt_lock); + +/* Keyslot bitmask (0 = available, 1 = in use/not available) */ +static u16 tegra_se_keyslots = SE_KEY_RSVD_MASK; + +static u16 tegra_keyslot_alloc(void) +{ + u16 keyid; + + mutex_lock(&kslt_lock); + /* Check if all key slots are full */ + if (tegra_se_keyslots == GENMASK(SE_MAX_KEYSLOT, 0)) { + mutex_unlock(&kslt_lock); + return 0; + } + + keyid = ffz(tegra_se_keyslots); + tegra_se_keyslots |= BIT(keyid); + + mutex_unlock(&kslt_lock); + + return keyid; +} + +static void tegra_keyslot_free(u16 slot) +{ + mutex_lock(&kslt_lock); + tegra_se_keyslots &= ~(BIT(slot)); + mutex_unlock(&kslt_lock); +} + +static unsigned int tegra_key_prep_ins_cmd(struct tegra_se *se, u32 *cpuvaddr, + const u32 *key, u32 keylen, u16 slot, u32 alg) +{ + int i = 0, j; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->op); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_DUMMY; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->manifest); + cpuvaddr[i++] = se->manifest(se->owner, alg, keylen); + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_dst); + + cpuvaddr[i++] = SE_AES_KEY_DST_INDEX(slot); + + for (j = 0; j < keylen / 4; j++) { + /* Set key address */ + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_addr); + cpuvaddr[i++] = j; + + /* Set key data */ + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->key_data); + cpuvaddr[i++] = key[j]; + } + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->config); + cpuvaddr[i++] = SE_CFG_INS; + + cpuvaddr[i++] = host1x_opcode_setpayload(1); + cpuvaddr[i++] = se_host1x_opcode_incr_w(se->hw->regs->op); + cpuvaddr[i++] = SE_AES_OP_WRSTALL | SE_AES_OP_START | + SE_AES_OP_LASTBUF; + + cpuvaddr[i++] = se_host1x_opcode_nonincr(host1x_uclass_incr_syncpt_r(), 1); + cpuvaddr[i++] = host1x_uclass_incr_syncpt_cond_f(1) | + host1x_uclass_incr_syncpt_indx_f(se->syncpt_id); + + dev_dbg(se->dev, "key-slot %u key-manifest %#x\n", + slot, se->manifest(se->owner, alg, keylen)); + + return i; +} + +static bool tegra_key_in_kslt(u32 keyid) +{ + bool ret; + + if (keyid > SE_MAX_KEYSLOT) + return false; + + mutex_lock(&kslt_lock); + ret = ((BIT(keyid) & SE_KEY_VALID_MASK) && + (BIT(keyid) & tegra_se_keyslots)); + mutex_unlock(&kslt_lock); + + return ret; +} + +static int tegra_key_insert(struct tegra_se *se, const u8 *key, + u32 keylen, u16 slot, u32 alg) +{ + const u32 *keyval = (u32 *)key; + u32 *addr = se->cmdbuf->addr, size; + + size = tegra_key_prep_ins_cmd(se, addr, keyval, keylen, slot, alg); + + return tegra_se_host1x_submit(se, size); +} + +void tegra_key_invalidate(struct tegra_se *se, u32 keyid, u32 alg) +{ + u8 zkey[AES_MAX_KEY_SIZE] = {0}; + + if (!keyid) + return; + + /* Overwrite the key with 0s */ + tegra_key_insert(se, zkey, AES_MAX_KEY_SIZE, keyid, alg); + + tegra_keyslot_free(keyid); +} + +int tegra_key_submit(struct tegra_se *se, const u8 *key, u32 keylen, u32 alg, u32 *keyid) +{ + int ret; + + /* Use the existing slot if it is already allocated */ + if (!tegra_key_in_kslt(*keyid)) { + *keyid = tegra_keyslot_alloc(); + if (!(*keyid)) { + dev_err(se->dev, "failed to allocate key slot\n"); + return -ENOMEM; + } + } + + ret = tegra_key_insert(se, key, keylen, *keyid, alg); + if (ret) + return ret; + + return 0; +} diff --git a/drivers/crypto/tegra/tegra-se-main.c b/drivers/crypto/tegra/tegra-se-main.c new file mode 100644 index 0000000000..f94c0331b1 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se-main.c @@ -0,0 +1,436 @@ +// SPDX-License-Identifier: GPL-2.0-only +// SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +/* + * Crypto driver for NVIDIA Security Engine in Tegra Chips + */ + +#include <linux/clk.h> +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/mod_devicetable.h> + +#include <crypto/engine.h> + +#include "tegra-se.h" + +static struct host1x_bo *tegra_se_cmdbuf_get(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + kref_get(&cmdbuf->ref); + + return host_bo; +} + +static void tegra_se_cmdbuf_release(struct kref *ref) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(ref, struct tegra_se_cmdbuf, ref); + + dma_free_attrs(cmdbuf->dev, cmdbuf->size, cmdbuf->addr, + cmdbuf->iova, 0); + + kfree(cmdbuf); +} + +static void tegra_se_cmdbuf_put(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + kref_put(&cmdbuf->ref, tegra_se_cmdbuf_release); +} + +static struct host1x_bo_mapping * +tegra_se_cmdbuf_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(bo, struct tegra_se_cmdbuf, bo); + struct host1x_bo_mapping *map; + int err; + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (!map) + return ERR_PTR(-ENOMEM); + + kref_init(&map->ref); + map->bo = host1x_bo_get(bo); + map->direction = direction; + map->dev = dev; + + map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL); + if (!map->sgt) { + err = -ENOMEM; + goto free; + } + + err = dma_get_sgtable(dev, map->sgt, cmdbuf->addr, + cmdbuf->iova, cmdbuf->words * 4); + if (err) + goto free_sgt; + + err = dma_map_sgtable(dev, map->sgt, direction, 0); + if (err) + goto free_sgt; + + map->phys = sg_dma_address(map->sgt->sgl); + map->size = cmdbuf->words * 4; + map->chunks = err; + + return map; + +free_sgt: + sg_free_table(map->sgt); + kfree(map->sgt); +free: + kfree(map); + return ERR_PTR(err); +} + +static void tegra_se_cmdbuf_unpin(struct host1x_bo_mapping *map) +{ + if (!map) + return; + + dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0); + sg_free_table(map->sgt); + kfree(map->sgt); + host1x_bo_put(map->bo); + + kfree(map); +} + +static void *tegra_se_cmdbuf_mmap(struct host1x_bo *host_bo) +{ + struct tegra_se_cmdbuf *cmdbuf = container_of(host_bo, struct tegra_se_cmdbuf, bo); + + return cmdbuf->addr; +} + +static void tegra_se_cmdbuf_munmap(struct host1x_bo *host_bo, void *addr) +{ +} + +static const struct host1x_bo_ops tegra_se_cmdbuf_ops = { + .get = tegra_se_cmdbuf_get, + .put = tegra_se_cmdbuf_put, + .pin = tegra_se_cmdbuf_pin, + .unpin = tegra_se_cmdbuf_unpin, + .mmap = tegra_se_cmdbuf_mmap, + .munmap = tegra_se_cmdbuf_munmap, +}; + +static struct tegra_se_cmdbuf *tegra_se_host1x_bo_alloc(struct tegra_se *se, ssize_t size) +{ + struct tegra_se_cmdbuf *cmdbuf; + struct device *dev = se->dev->parent; + + cmdbuf = kzalloc(sizeof(*cmdbuf), GFP_KERNEL); + if (!cmdbuf) + return NULL; + + cmdbuf->addr = dma_alloc_attrs(dev, size, &cmdbuf->iova, + GFP_KERNEL, 0); + if (!cmdbuf->addr) + return NULL; + + cmdbuf->size = size; + cmdbuf->dev = dev; + + host1x_bo_init(&cmdbuf->bo, &tegra_se_cmdbuf_ops); + kref_init(&cmdbuf->ref); + + return cmdbuf; +} + +int tegra_se_host1x_submit(struct tegra_se *se, u32 size) +{ + struct host1x_job *job; + int ret; + + job = host1x_job_alloc(se->channel, 1, 0, true); + if (!job) { + dev_err(se->dev, "failed to allocate host1x job\n"); + return -ENOMEM; + } + + job->syncpt = host1x_syncpt_get(se->syncpt); + job->syncpt_incrs = 1; + job->client = &se->client; + job->class = se->client.class; + job->serialize = true; + job->engine_fallback_streamid = se->stream_id; + job->engine_streamid_offset = SE_STREAM_ID; + + se->cmdbuf->words = size; + + host1x_job_add_gather(job, &se->cmdbuf->bo, size, 0); + + ret = host1x_job_pin(job, se->dev); + if (ret) { + dev_err(se->dev, "failed to pin host1x job\n"); + goto job_put; + } + + ret = host1x_job_submit(job); + if (ret) { + dev_err(se->dev, "failed to submit host1x job\n"); + goto job_unpin; + } + + ret = host1x_syncpt_wait(job->syncpt, job->syncpt_end, + MAX_SCHEDULE_TIMEOUT, NULL); + if (ret) { + dev_err(se->dev, "host1x job timed out\n"); + return ret; + } + + host1x_job_put(job); + return 0; + +job_unpin: + host1x_job_unpin(job); +job_put: + host1x_job_put(job); + + return ret; +} + +static int tegra_se_client_init(struct host1x_client *client) +{ + struct tegra_se *se = container_of(client, struct tegra_se, client); + int ret; + + se->channel = host1x_channel_request(&se->client); + if (!se->channel) { + dev_err(se->dev, "host1x channel map failed\n"); + return -ENODEV; + } + + se->syncpt = host1x_syncpt_request(&se->client, 0); + if (!se->syncpt) { + dev_err(se->dev, "host1x syncpt allocation failed\n"); + ret = -EINVAL; + goto channel_put; + } + + se->syncpt_id = host1x_syncpt_id(se->syncpt); + + se->cmdbuf = tegra_se_host1x_bo_alloc(se, SZ_4K); + if (!se->cmdbuf) { + ret = -ENOMEM; + goto syncpt_put; + } + + ret = se->hw->init_alg(se); + if (ret) { + dev_err(se->dev, "failed to register algorithms\n"); + goto cmdbuf_put; + } + + return 0; + +cmdbuf_put: + tegra_se_cmdbuf_put(&se->cmdbuf->bo); +syncpt_put: + host1x_syncpt_put(se->syncpt); +channel_put: + host1x_channel_put(se->channel); + + return ret; +} + +static int tegra_se_client_deinit(struct host1x_client *client) +{ + struct tegra_se *se = container_of(client, struct tegra_se, client); + + se->hw->deinit_alg(se); + tegra_se_cmdbuf_put(&se->cmdbuf->bo); + host1x_syncpt_put(se->syncpt); + host1x_channel_put(se->channel); + + return 0; +} + +static const struct host1x_client_ops tegra_se_client_ops = { + .init = tegra_se_client_init, + .exit = tegra_se_client_deinit, +}; + +static int tegra_se_host1x_register(struct tegra_se *se) +{ + INIT_LIST_HEAD(&se->client.list); + se->client.dev = se->dev; + se->client.ops = &tegra_se_client_ops; + se->client.class = se->hw->host1x_class; + se->client.num_syncpts = 1; + + host1x_client_register(&se->client); + + return 0; +} + +static int tegra_se_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct tegra_se *se; + int ret; + + se = devm_kzalloc(dev, sizeof(*se), GFP_KERNEL); + if (!se) + return -ENOMEM; + + se->dev = dev; + se->owner = TEGRA_GPSE_ID; + se->hw = device_get_match_data(&pdev->dev); + + se->base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(se->base)) + return PTR_ERR(se->base); + + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(39)); + platform_set_drvdata(pdev, se); + + se->clk = devm_clk_get_enabled(se->dev, NULL); + if (IS_ERR(se->clk)) + return dev_err_probe(dev, PTR_ERR(se->clk), + "failed to enable clocks\n"); + + if (!tegra_dev_iommu_get_stream_id(dev, &se->stream_id)) + return dev_err_probe(dev, -ENODEV, + "failed to get IOMMU stream ID\n"); + + writel(se->stream_id, se->base + SE_STREAM_ID); + + se->engine = crypto_engine_alloc_init(dev, 0); + if (!se->engine) + return dev_err_probe(dev, -ENOMEM, "failed to init crypto engine\n"); + + ret = crypto_engine_start(se->engine); + if (ret) { + crypto_engine_exit(se->engine); + return dev_err_probe(dev, ret, "failed to start crypto engine\n"); + } + + ret = tegra_se_host1x_register(se); + if (ret) { + crypto_engine_stop(se->engine); + crypto_engine_exit(se->engine); + return dev_err_probe(dev, ret, "failed to init host1x params\n"); + } + + return 0; +} + +static void tegra_se_remove(struct platform_device *pdev) +{ + struct tegra_se *se = platform_get_drvdata(pdev); + + crypto_engine_stop(se->engine); + crypto_engine_exit(se->engine); + host1x_client_unregister(&se->client); +} + +static const struct tegra_se_regs tegra234_aes1_regs = { + .config = SE_AES1_CFG, + .op = SE_AES1_OPERATION, + .last_blk = SE_AES1_LAST_BLOCK, + .linear_ctr = SE_AES1_LINEAR_CTR, + .aad_len = SE_AES1_AAD_LEN, + .cryp_msg_len = SE_AES1_CRYPTO_MSG_LEN, + .manifest = SE_AES1_KEYMANIFEST, + .key_addr = SE_AES1_KEY_ADDR, + .key_data = SE_AES1_KEY_DATA, + .key_dst = SE_AES1_KEY_DST, + .result = SE_AES1_CMAC_RESULT, +}; + +static const struct tegra_se_regs tegra234_hash_regs = { + .config = SE_SHA_CFG, + .op = SE_SHA_OPERATION, + .manifest = SE_SHA_KEYMANIFEST, + .key_addr = SE_SHA_KEY_ADDR, + .key_data = SE_SHA_KEY_DATA, + .key_dst = SE_SHA_KEY_DST, + .result = SE_SHA_HASH_RESULT, +}; + +static const struct tegra_se_hw tegra234_aes_hw = { + .regs = &tegra234_aes1_regs, + .kac_ver = 1, + .host1x_class = 0x3b, + .init_alg = tegra_init_aes, + .deinit_alg = tegra_deinit_aes, +}; + +static const struct tegra_se_hw tegra234_hash_hw = { + .regs = &tegra234_hash_regs, + .kac_ver = 1, + .host1x_class = 0x3d, + .init_alg = tegra_init_hash, + .deinit_alg = tegra_deinit_hash, +}; + +static const struct of_device_id tegra_se_of_match[] = { + { + .compatible = "nvidia,tegra234-se-aes", + .data = &tegra234_aes_hw + }, { + .compatible = "nvidia,tegra234-se-hash", + .data = &tegra234_hash_hw, + }, + { }, +}; +MODULE_DEVICE_TABLE(of, tegra_se_of_match); + +static struct platform_driver tegra_se_driver = { + .driver = { + .name = "tegra-se", + .of_match_table = tegra_se_of_match, + }, + .probe = tegra_se_probe, + .remove_new = tegra_se_remove, +}; + +static int tegra_se_host1x_probe(struct host1x_device *dev) +{ + return host1x_device_init(dev); +} + +static int tegra_se_host1x_remove(struct host1x_device *dev) +{ + host1x_device_exit(dev); + + return 0; +} + +static struct host1x_driver tegra_se_host1x_driver = { + .driver = { + .name = "tegra-se-host1x", + }, + .probe = tegra_se_host1x_probe, + .remove = tegra_se_host1x_remove, + .subdevs = tegra_se_of_match, +}; + +static int __init tegra_se_module_init(void) +{ + int ret; + + ret = host1x_driver_register(&tegra_se_host1x_driver); + if (ret) + return ret; + + return platform_driver_register(&tegra_se_driver); +} + +static void __exit tegra_se_module_exit(void) +{ + host1x_driver_unregister(&tegra_se_host1x_driver); + platform_driver_unregister(&tegra_se_driver); +} + +module_init(tegra_se_module_init); +module_exit(tegra_se_module_exit); + +MODULE_DESCRIPTION("NVIDIA Tegra Security Engine Driver"); +MODULE_AUTHOR("Akhil R <akhilrajeev@nvidia.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/tegra/tegra-se.h b/drivers/crypto/tegra/tegra-se.h new file mode 100644 index 0000000000..b9dd7ceb87 --- /dev/null +++ b/drivers/crypto/tegra/tegra-se.h @@ -0,0 +1,560 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * + * Header file for NVIDIA Security Engine driver. + */ + +#ifndef _TEGRA_SE_H +#define _TEGRA_SE_H + +#include <linux/bitfield.h> +#include <linux/iommu.h> +#include <linux/host1x.h> +#include <crypto/aead.h> +#include <crypto/engine.h> +#include <crypto/hash.h> +#include <crypto/sha1.h> +#include <crypto/sha3.h> +#include <crypto/skcipher.h> + +#define SE_OWNERSHIP 0x14 +#define SE_OWNERSHIP_UID(x) FIELD_GET(GENMASK(7, 0), x) +#define TEGRA_GPSE_ID 3 + +#define SE_STREAM_ID 0x90 + +#define SE_SHA_CFG 0x4004 +#define SE_SHA_KEY_ADDR 0x4094 +#define SE_SHA_KEY_DATA 0x4098 +#define SE_SHA_KEYMANIFEST 0x409c +#define SE_SHA_CRYPTO_CFG 0x40a4 +#define SE_SHA_KEY_DST 0x40a8 +#define SE_SHA_SRC_KSLT 0x4180 +#define SE_SHA_TGT_KSLT 0x4184 +#define SE_SHA_MSG_LENGTH 0x401c +#define SE_SHA_OPERATION 0x407c +#define SE_SHA_HASH_RESULT 0x40b0 + +#define SE_SHA_ENC_MODE(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_SHA_ENC_MODE_SHA1 SE_SHA_ENC_MODE(0) +#define SE_SHA_ENC_MODE_SHA224 SE_SHA_ENC_MODE(4) +#define SE_SHA_ENC_MODE_SHA256 SE_SHA_ENC_MODE(5) +#define SE_SHA_ENC_MODE_SHA384 SE_SHA_ENC_MODE(6) +#define SE_SHA_ENC_MODE_SHA512 SE_SHA_ENC_MODE(7) +#define SE_SHA_ENC_MODE_SHA_CTX_INTEGRITY SE_SHA_ENC_MODE(8) +#define SE_SHA_ENC_MODE_SHA3_224 SE_SHA_ENC_MODE(9) +#define SE_SHA_ENC_MODE_SHA3_256 SE_SHA_ENC_MODE(10) +#define SE_SHA_ENC_MODE_SHA3_384 SE_SHA_ENC_MODE(11) +#define SE_SHA_ENC_MODE_SHA3_512 SE_SHA_ENC_MODE(12) +#define SE_SHA_ENC_MODE_SHAKE128 SE_SHA_ENC_MODE(13) +#define SE_SHA_ENC_MODE_SHAKE256 SE_SHA_ENC_MODE(14) +#define SE_SHA_ENC_MODE_HMAC_SHA256_1KEY SE_SHA_ENC_MODE(0) +#define SE_SHA_ENC_MODE_HMAC_SHA256_2KEY SE_SHA_ENC_MODE(1) +#define SE_SHA_ENC_MODE_SM3_256 SE_SHA_ENC_MODE(0) + +#define SE_SHA_CFG_ENC_ALG(x) FIELD_PREP(GENMASK(15, 12), x) +#define SE_SHA_ENC_ALG_NOP SE_SHA_CFG_ENC_ALG(0) +#define SE_SHA_ENC_ALG_SHA_ENC SE_SHA_CFG_ENC_ALG(1) +#define SE_SHA_ENC_ALG_RNG SE_SHA_CFG_ENC_ALG(2) +#define SE_SHA_ENC_ALG_SHA SE_SHA_CFG_ENC_ALG(3) +#define SE_SHA_ENC_ALG_SM3 SE_SHA_CFG_ENC_ALG(4) +#define SE_SHA_ENC_ALG_HMAC SE_SHA_CFG_ENC_ALG(7) +#define SE_SHA_ENC_ALG_KDF SE_SHA_CFG_ENC_ALG(8) +#define SE_SHA_ENC_ALG_KEY_INVLD SE_SHA_CFG_ENC_ALG(10) +#define SE_SHA_ENC_ALG_KEY_INQUIRE SE_SHA_CFG_ENC_ALG(12) +#define SE_SHA_ENC_ALG_INS SE_SHA_CFG_ENC_ALG(13) + +#define SE_SHA_OP_LASTBUF FIELD_PREP(BIT(16), 1) +#define SE_SHA_OP_WRSTALL FIELD_PREP(BIT(15), 1) + +#define SE_SHA_OP_OP(x) FIELD_PREP(GENMASK(2, 0), x) +#define SE_SHA_OP_START SE_SHA_OP_OP(1) +#define SE_SHA_OP_RESTART_OUT SE_SHA_OP_OP(2) +#define SE_SHA_OP_RESTART_IN SE_SHA_OP_OP(4) +#define SE_SHA_OP_RESTART_INOUT SE_SHA_OP_OP(5) +#define SE_SHA_OP_DUMMY SE_SHA_OP_OP(6) + +#define SE_SHA_CFG_DEC_ALG(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_SHA_DEC_ALG_NOP SE_SHA_CFG_DEC_ALG(0) +#define SE_SHA_DEC_ALG_AES_DEC SE_SHA_CFG_DEC_ALG(1) +#define SE_SHA_DEC_ALG_HMAC SE_SHA_CFG_DEC_ALG(7) +#define SE_SHA_DEC_ALG_HMAC_VERIFY SE_SHA_CFG_DEC_ALG(9) + +#define SE_SHA_CFG_DST(x) FIELD_PREP(GENMASK(4, 2), x) +#define SE_SHA_DST_MEMORY SE_SHA_CFG_DST(0) +#define SE_SHA_DST_HASH_REG SE_SHA_CFG_DST(1) +#define SE_SHA_DST_KEYTABLE SE_SHA_CFG_DST(2) +#define SE_SHA_DST_SRK SE_SHA_CFG_DST(3) + +#define SE_SHA_TASK_HASH_INIT BIT(0) + +/* AES Configuration */ +#define SE_AES0_CFG 0x1004 +#define SE_AES0_CRYPTO_CONFIG 0x1008 +#define SE_AES0_KEY_DST 0x1030 +#define SE_AES0_OPERATION 0x1038 +#define SE_AES0_LINEAR_CTR 0x101c +#define SE_AES0_LAST_BLOCK 0x102c +#define SE_AES0_KEY_ADDR 0x10bc +#define SE_AES0_KEY_DATA 0x10c0 +#define SE_AES0_CMAC_RESULT 0x10c4 +#define SE_AES0_SRC_KSLT 0x1100 +#define SE_AES0_TGT_KSLT 0x1104 +#define SE_AES0_KEYMANIFEST 0x1114 +#define SE_AES0_AAD_LEN 0x112c +#define SE_AES0_CRYPTO_MSG_LEN 0x1134 + +#define SE_AES1_CFG 0x2004 +#define SE_AES1_CRYPTO_CONFIG 0x2008 +#define SE_AES1_KEY_DST 0x2030 +#define SE_AES1_OPERATION 0x2038 +#define SE_AES1_LINEAR_CTR 0x201c +#define SE_AES1_LAST_BLOCK 0x202c +#define SE_AES1_KEY_ADDR 0x20bc +#define SE_AES1_KEY_DATA 0x20c0 +#define SE_AES1_CMAC_RESULT 0x20c4 +#define SE_AES1_SRC_KSLT 0x2100 +#define SE_AES1_TGT_KSLT 0x2104 +#define SE_AES1_KEYMANIFEST 0x2114 +#define SE_AES1_AAD_LEN 0x212c +#define SE_AES1_CRYPTO_MSG_LEN 0x2134 + +#define SE_AES_CFG_ENC_MODE(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_AES_ENC_MODE_GMAC SE_AES_CFG_ENC_MODE(3) +#define SE_AES_ENC_MODE_GCM SE_AES_CFG_ENC_MODE(4) +#define SE_AES_ENC_MODE_GCM_FINAL SE_AES_CFG_ENC_MODE(5) +#define SE_AES_ENC_MODE_CMAC SE_AES_CFG_ENC_MODE(7) +#define SE_AES_ENC_MODE_CBC_MAC SE_AES_CFG_ENC_MODE(12) + +#define SE_AES_CFG_DEC_MODE(x) FIELD_PREP(GENMASK(23, 16), x) +#define SE_AES_DEC_MODE_GMAC SE_AES_CFG_DEC_MODE(3) +#define SE_AES_DEC_MODE_GCM SE_AES_CFG_DEC_MODE(4) +#define SE_AES_DEC_MODE_GCM_FINAL SE_AES_CFG_DEC_MODE(5) +#define SE_AES_DEC_MODE_CBC_MAC SE_AES_CFG_DEC_MODE(12) + +#define SE_AES_CFG_ENC_ALG(x) FIELD_PREP(GENMASK(15, 12), x) +#define SE_AES_ENC_ALG_NOP SE_AES_CFG_ENC_ALG(0) +#define SE_AES_ENC_ALG_AES_ENC SE_AES_CFG_ENC_ALG(1) +#define SE_AES_ENC_ALG_RNG SE_AES_CFG_ENC_ALG(2) +#define SE_AES_ENC_ALG_SHA SE_AES_CFG_ENC_ALG(3) +#define SE_AES_ENC_ALG_HMAC SE_AES_CFG_ENC_ALG(7) +#define SE_AES_ENC_ALG_KDF SE_AES_CFG_ENC_ALG(8) +#define SE_AES_ENC_ALG_INS SE_AES_CFG_ENC_ALG(13) + +#define SE_AES_CFG_DEC_ALG(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_AES_DEC_ALG_NOP SE_AES_CFG_DEC_ALG(0) +#define SE_AES_DEC_ALG_AES_DEC SE_AES_CFG_DEC_ALG(1) + +#define SE_AES_CFG_DST(x) FIELD_PREP(GENMASK(4, 2), x) +#define SE_AES_DST_MEMORY SE_AES_CFG_DST(0) +#define SE_AES_DST_HASH_REG SE_AES_CFG_DST(1) +#define SE_AES_DST_KEYTABLE SE_AES_CFG_DST(2) +#define SE_AES_DST_SRK SE_AES_CFG_DST(3) + +/* AES Crypto Configuration */ +#define SE_AES_KEY2_INDEX(x) FIELD_PREP(GENMASK(31, 28), x) +#define SE_AES_KEY_INDEX(x) FIELD_PREP(GENMASK(27, 24), x) + +#define SE_AES_CRYPTO_CFG_SCC_DIS FIELD_PREP(BIT(20), 1) + +#define SE_AES_CRYPTO_CFG_CTR_CNTN(x) FIELD_PREP(GENMASK(18, 11), x) + +#define SE_AES_CRYPTO_CFG_IV_MODE(x) FIELD_PREP(BIT(10), x) +#define SE_AES_IV_MODE_SWIV SE_AES_CRYPTO_CFG_IV_MODE(0) +#define SE_AES_IV_MODE_HWIV SE_AES_CRYPTO_CFG_IV_MODE(1) + +#define SE_AES_CRYPTO_CFG_CORE_SEL(x) FIELD_PREP(BIT(9), x) +#define SE_AES_CORE_SEL_DECRYPT SE_AES_CRYPTO_CFG_CORE_SEL(0) +#define SE_AES_CORE_SEL_ENCRYPT SE_AES_CRYPTO_CFG_CORE_SEL(1) + +#define SE_AES_CRYPTO_CFG_IV_SEL(x) FIELD_PREP(GENMASK(8, 7), x) +#define SE_AES_IV_SEL_UPDATED SE_AES_CRYPTO_CFG_IV_SEL(1) +#define SE_AES_IV_SEL_REG SE_AES_CRYPTO_CFG_IV_SEL(2) +#define SE_AES_IV_SEL_RANDOM SE_AES_CRYPTO_CFG_IV_SEL(3) + +#define SE_AES_CRYPTO_CFG_VCTRAM_SEL(x) FIELD_PREP(GENMASK(6, 5), x) +#define SE_AES_VCTRAM_SEL_MEMORY SE_AES_CRYPTO_CFG_VCTRAM_SEL(0) +#define SE_AES_VCTRAM_SEL_TWEAK SE_AES_CRYPTO_CFG_VCTRAM_SEL(1) +#define SE_AES_VCTRAM_SEL_AESOUT SE_AES_CRYPTO_CFG_VCTRAM_SEL(2) +#define SE_AES_VCTRAM_SEL_PREV_MEM SE_AES_CRYPTO_CFG_VCTRAM_SEL(3) + +#define SE_AES_CRYPTO_CFG_INPUT_SEL(x) FIELD_PREP(GENMASK(4, 3), x) +#define SE_AES_INPUT_SEL_MEMORY SE_AES_CRYPTO_CFG_INPUT_SEL(0) +#define SE_AES_INPUT_SEL_RANDOM SE_AES_CRYPTO_CFG_INPUT_SEL(1) +#define SE_AES_INPUT_SEL_AESOUT SE_AES_CRYPTO_CFG_INPUT_SEL(2) +#define SE_AES_INPUT_SEL_LINEAR_CTR SE_AES_CRYPTO_CFG_INPUT_SEL(3) +#define SE_AES_INPUT_SEL_REG SE_AES_CRYPTO_CFG_INPUT_SEL(1) + +#define SE_AES_CRYPTO_CFG_XOR_POS(x) FIELD_PREP(GENMASK(2, 1), x) +#define SE_AES_XOR_POS_BYPASS SE_AES_CRYPTO_CFG_XOR_POS(0) +#define SE_AES_XOR_POS_BOTH SE_AES_CRYPTO_CFG_XOR_POS(1) +#define SE_AES_XOR_POS_TOP SE_AES_CRYPTO_CFG_XOR_POS(2) +#define SE_AES_XOR_POS_BOTTOM SE_AES_CRYPTO_CFG_XOR_POS(3) + +#define SE_AES_CRYPTO_CFG_HASH_EN(x) FIELD_PREP(BIT(0), x) +#define SE_AES_HASH_DISABLE SE_AES_CRYPTO_CFG_HASH_EN(0) +#define SE_AES_HASH_ENABLE SE_AES_CRYPTO_CFG_HASH_EN(1) + +#define SE_LAST_BLOCK_VAL(x) FIELD_PREP(GENMASK(19, 0), x) +#define SE_LAST_BLOCK_RES_BITS(x) FIELD_PREP(GENMASK(26, 20), x) + +#define SE_AES_OP_LASTBUF FIELD_PREP(BIT(16), 1) +#define SE_AES_OP_WRSTALL FIELD_PREP(BIT(15), 1) +#define SE_AES_OP_FINAL FIELD_PREP(BIT(5), 1) +#define SE_AES_OP_INIT FIELD_PREP(BIT(4), 1) + +#define SE_AES_OP_OP(x) FIELD_PREP(GENMASK(2, 0), x) +#define SE_AES_OP_START SE_AES_OP_OP(1) +#define SE_AES_OP_RESTART_OUT SE_AES_OP_OP(2) +#define SE_AES_OP_RESTART_IN SE_AES_OP_OP(4) +#define SE_AES_OP_RESTART_INOUT SE_AES_OP_OP(5) +#define SE_AES_OP_DUMMY SE_AES_OP_OP(6) + +#define SE_KAC_SIZE(x) FIELD_PREP(GENMASK(15, 14), x) +#define SE_KAC_SIZE_128 SE_KAC_SIZE(0) +#define SE_KAC_SIZE_192 SE_KAC_SIZE(1) +#define SE_KAC_SIZE_256 SE_KAC_SIZE(2) + +#define SE_KAC_EXPORTABLE FIELD_PREP(BIT(12), 1) + +#define SE_KAC_PURPOSE(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_KAC_ENC SE_KAC_PURPOSE(0) +#define SE_KAC_CMAC SE_KAC_PURPOSE(1) +#define SE_KAC_HMAC SE_KAC_PURPOSE(2) +#define SE_KAC_GCM_KW SE_KAC_PURPOSE(3) +#define SE_KAC_HMAC_KDK SE_KAC_PURPOSE(6) +#define SE_KAC_HMAC_KDD SE_KAC_PURPOSE(7) +#define SE_KAC_HMAC_KDD_KUW SE_KAC_PURPOSE(8) +#define SE_KAC_XTS SE_KAC_PURPOSE(9) +#define SE_KAC_GCM SE_KAC_PURPOSE(10) + +#define SE_KAC_USER_NS FIELD_PREP(GENMASK(6, 4), 3) + +#define SE_AES_KEY_DST_INDEX(x) FIELD_PREP(GENMASK(11, 8), x) +#define SE_ADDR_HI_MSB(x) FIELD_PREP(GENMASK(31, 24), x) +#define SE_ADDR_HI_SZ(x) FIELD_PREP(GENMASK(23, 0), x) + +#define SE_CFG_AES_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_AES_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GMAC_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GMAC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GMAC_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GMAC | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GCM | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GCM | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_FINAL_ENCRYPT (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_DEC_ALG_NOP | \ + SE_AES_ENC_MODE_GCM_FINAL | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_GCM_FINAL_DECRYPT (SE_AES_ENC_ALG_NOP | \ + SE_AES_DEC_ALG_AES_DEC | \ + SE_AES_DEC_MODE_GCM_FINAL | \ + SE_AES_DST_MEMORY) + +#define SE_CFG_CMAC (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_ENC_MODE_CMAC | \ + SE_AES_DST_HASH_REG) + +#define SE_CFG_CBC_MAC (SE_AES_ENC_ALG_AES_ENC | \ + SE_AES_ENC_MODE_CBC_MAC) + +#define SE_CFG_INS (SE_AES_ENC_ALG_INS | \ + SE_AES_DEC_ALG_NOP) + +#define SE_CRYPTO_CFG_ECB_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_XOR_POS_BYPASS | \ + SE_AES_CORE_SEL_ENCRYPT) + +#define SE_CRYPTO_CFG_ECB_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_XOR_POS_BYPASS | \ + SE_AES_CORE_SEL_DECRYPT) + +#define SE_CRYPTO_CFG_CBC_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_AESOUT | \ + SE_AES_XOR_POS_TOP | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CBC_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_PREV_MEM | \ + SE_AES_XOR_POS_BOTTOM | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CTR (SE_AES_INPUT_SEL_LINEAR_CTR | \ + SE_AES_VCTRAM_SEL_MEMORY | \ + SE_AES_XOR_POS_BOTTOM | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_CRYPTO_CFG_CTR_CNTN(1) | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_ENCRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_XTS_DECRYPT (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_TWEAK | \ + SE_AES_XOR_POS_BOTH | \ + SE_AES_CORE_SEL_DECRYPT | \ + SE_AES_IV_SEL_REG) + +#define SE_CRYPTO_CFG_CBC_MAC (SE_AES_INPUT_SEL_MEMORY | \ + SE_AES_VCTRAM_SEL_AESOUT | \ + SE_AES_XOR_POS_TOP | \ + SE_AES_CORE_SEL_ENCRYPT | \ + SE_AES_HASH_ENABLE | \ + SE_AES_IV_SEL_REG) + +#define HASH_RESULT_REG_COUNT 50 +#define CMAC_RESULT_REG_COUNT 4 + +#define SE_CRYPTO_CTR_REG_COUNT 4 +#define SE_MAX_KEYSLOT 15 +#define SE_MAX_MEM_ALLOC SZ_4M +#define SE_AES_BUFLEN 0x8000 +#define SE_SHA_BUFLEN 0x2000 + +#define SHA_FIRST BIT(0) +#define SHA_UPDATE BIT(1) +#define SHA_FINAL BIT(2) + +/* Security Engine operation modes */ +enum se_aes_alg { + SE_ALG_CBC, /* Cipher Block Chaining (CBC) mode */ + SE_ALG_ECB, /* Electronic Codebook (ECB) mode */ + SE_ALG_CTR, /* Counter (CTR) mode */ + SE_ALG_XTS, /* XTS mode */ + SE_ALG_GMAC, /* GMAC mode */ + SE_ALG_GCM, /* GCM mode */ + SE_ALG_GCM_FINAL, /* GCM FINAL mode */ + SE_ALG_CMAC, /* Cipher-based MAC (CMAC) mode */ + SE_ALG_CBC_MAC, /* CBC MAC mode */ +}; + +enum se_hash_alg { + SE_ALG_RNG_DRBG, /* Deterministic Random Bit Generator */ + SE_ALG_SHA1, /* Secure Hash Algorithm-1 (SHA1) mode */ + SE_ALG_SHA224, /* Secure Hash Algorithm-224 (SHA224) mode */ + SE_ALG_SHA256, /* Secure Hash Algorithm-256 (SHA256) mode */ + SE_ALG_SHA384, /* Secure Hash Algorithm-384 (SHA384) mode */ + SE_ALG_SHA512, /* Secure Hash Algorithm-512 (SHA512) mode */ + SE_ALG_SHA3_224, /* Secure Hash Algorithm3-224 (SHA3-224) mode */ + SE_ALG_SHA3_256, /* Secure Hash Algorithm3-256 (SHA3-256) mode */ + SE_ALG_SHA3_384, /* Secure Hash Algorithm3-384 (SHA3-384) mode */ + SE_ALG_SHA3_512, /* Secure Hash Algorithm3-512 (SHA3-512) mode */ + SE_ALG_SHAKE128, /* Secure Hash Algorithm3 (SHAKE128) mode */ + SE_ALG_SHAKE256, /* Secure Hash Algorithm3 (SHAKE256) mode */ + SE_ALG_HMAC_SHA224, /* Hash based MAC (HMAC) - 224 */ + SE_ALG_HMAC_SHA256, /* Hash based MAC (HMAC) - 256 */ + SE_ALG_HMAC_SHA384, /* Hash based MAC (HMAC) - 384 */ + SE_ALG_HMAC_SHA512, /* Hash based MAC (HMAC) - 512 */ +}; + +struct tegra_se_alg { + struct tegra_se *se_dev; + const char *alg_base; + + union { + struct skcipher_engine_alg skcipher; + struct aead_engine_alg aead; + struct ahash_engine_alg ahash; + } alg; +}; + +struct tegra_se_regs { + u32 op; + u32 config; + u32 last_blk; + u32 linear_ctr; + u32 out_addr; + u32 aad_len; + u32 cryp_msg_len; + u32 manifest; + u32 key_addr; + u32 key_data; + u32 key_dst; + u32 result; +}; + +struct tegra_se_hw { + const struct tegra_se_regs *regs; + int (*init_alg)(struct tegra_se *se); + void (*deinit_alg)(struct tegra_se *se); + bool support_sm_alg; + u32 host1x_class; + u32 kac_ver; +}; + +struct tegra_se { + int (*manifest)(u32 user, u32 alg, u32 keylen); + const struct tegra_se_hw *hw; + struct host1x_client client; + struct host1x_channel *channel; + struct tegra_se_cmdbuf *cmdbuf; + struct crypto_engine *engine; + struct host1x_syncpt *syncpt; + struct device *dev; + struct clk *clk; + unsigned int opcode_addr; + unsigned int stream_id; + unsigned int syncpt_id; + void __iomem *base; + u32 owner; +}; + +struct tegra_se_cmdbuf { + dma_addr_t iova; + u32 *addr; + struct device *dev; + struct kref ref; + struct host1x_bo bo; + ssize_t size; + u32 words; +}; + +struct tegra_se_datbuf { + u8 *buf; + dma_addr_t addr; + ssize_t size; +}; + +static inline int se_algname_to_algid(const char *name) +{ + if (!strcmp(name, "cbc(aes)")) + return SE_ALG_CBC; + else if (!strcmp(name, "ecb(aes)")) + return SE_ALG_ECB; + else if (!strcmp(name, "ctr(aes)")) + return SE_ALG_CTR; + else if (!strcmp(name, "xts(aes)")) + return SE_ALG_XTS; + else if (!strcmp(name, "cmac(aes)")) + return SE_ALG_CMAC; + else if (!strcmp(name, "gcm(aes)")) + return SE_ALG_GCM; + else if (!strcmp(name, "ccm(aes)")) + return SE_ALG_CBC_MAC; + + else if (!strcmp(name, "sha1")) + return SE_ALG_SHA1; + else if (!strcmp(name, "sha224")) + return SE_ALG_SHA224; + else if (!strcmp(name, "sha256")) + return SE_ALG_SHA256; + else if (!strcmp(name, "sha384")) + return SE_ALG_SHA384; + else if (!strcmp(name, "sha512")) + return SE_ALG_SHA512; + else if (!strcmp(name, "sha3-224")) + return SE_ALG_SHA3_224; + else if (!strcmp(name, "sha3-256")) + return SE_ALG_SHA3_256; + else if (!strcmp(name, "sha3-384")) + return SE_ALG_SHA3_384; + else if (!strcmp(name, "sha3-512")) + return SE_ALG_SHA3_512; + else if (!strcmp(name, "hmac(sha224)")) + return SE_ALG_HMAC_SHA224; + else if (!strcmp(name, "hmac(sha256)")) + return SE_ALG_HMAC_SHA256; + else if (!strcmp(name, "hmac(sha384)")) + return SE_ALG_HMAC_SHA384; + else if (!strcmp(name, "hmac(sha512)")) + return SE_ALG_HMAC_SHA512; + else + return -EINVAL; +} + +/* Functions */ +int tegra_init_aes(struct tegra_se *se); +int tegra_init_hash(struct tegra_se *se); +void tegra_deinit_aes(struct tegra_se *se); +void tegra_deinit_hash(struct tegra_se *se); +int tegra_key_submit(struct tegra_se *se, const u8 *key, + u32 keylen, u32 alg, u32 *keyid); +void tegra_key_invalidate(struct tegra_se *se, u32 keyid, u32 alg); +int tegra_se_host1x_submit(struct tegra_se *se, u32 size); + +/* HOST1x OPCODES */ +static inline u32 host1x_opcode_setpayload(unsigned int payload) +{ + return (9 << 28) | payload; +} + +static inline u32 host1x_opcode_incr_w(unsigned int offset) +{ + /* 22-bit offset supported */ + return (10 << 28) | offset; +} + +static inline u32 host1x_opcode_nonincr_w(unsigned int offset) +{ + /* 22-bit offset supported */ + return (11 << 28) | offset; +} + +static inline u32 host1x_opcode_incr(unsigned int offset, unsigned int count) +{ + return (1 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_nonincr(unsigned int offset, unsigned int count) +{ + return (2 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} + +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} + +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} + +#define se_host1x_opcode_incr_w(x) host1x_opcode_incr_w((x) / 4) +#define se_host1x_opcode_nonincr_w(x) host1x_opcode_nonincr_w((x) / 4) +#define se_host1x_opcode_incr(x, y) host1x_opcode_incr((x) / 4, y) +#define se_host1x_opcode_nonincr(x, y) host1x_opcode_nonincr((x) / 4, y) + +#endif /*_TEGRA_SE_H*/ diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index de53eddf67..cb92b7fa99 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -225,11 +225,11 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request struct virtio_crypto *vcrypto = ctx->vcrypto; struct virtio_crypto_op_data_req *req_data = vc_req->req_data; struct scatterlist *sgs[4], outhdr_sg, inhdr_sg, srcdata_sg, dstdata_sg; - void *src_buf = NULL, *dst_buf = NULL; + void *src_buf, *dst_buf = NULL; unsigned int num_out = 0, num_in = 0; int node = dev_to_node(&vcrypto->vdev->dev); unsigned long flags; - int ret = -ENOMEM; + int ret; bool verify = vc_akcipher_req->opcode == VIRTIO_CRYPTO_AKCIPHER_VERIFY; unsigned int src_len = verify ? req->src_len + req->dst_len : req->src_len; @@ -240,7 +240,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request /* src data */ src_buf = kcalloc_node(src_len, 1, GFP_KERNEL, node); if (!src_buf) - goto err; + return -ENOMEM; if (verify) { /* for verify operation, both src and dst data work as OUT direction */ @@ -255,7 +255,7 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request /* dst data */ dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node); if (!dst_buf) - goto err; + goto free_src; sg_init_one(&dstdata_sg, dst_buf, req->dst_len); sgs[num_out + num_in++] = &dstdata_sg; @@ -278,9 +278,9 @@ static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request return 0; err: - kfree(src_buf); kfree(dst_buf); - +free_src: + kfree(src_buf); return -ENOMEM; } diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index b909c6a2bf..30cd040aa0 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -42,8 +42,6 @@ static void virtcrypto_ctrlq_callback(struct virtqueue *vq) virtio_crypto_ctrlq_callback(vc_ctrl_req); spin_lock_irqsave(&vcrypto->ctrl_lock, flags); } - if (unlikely(virtqueue_is_broken(vq))) - break; } while (!virtqueue_enable_cb(vq)); spin_unlock_irqrestore(&vcrypto->ctrl_lock, flags); } @@ -583,7 +581,6 @@ static const struct virtio_device_id id_table[] = { static struct virtio_driver virtio_crypto_driver = { .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, .feature_table = features, .feature_table_size = ARRAY_SIZE(features), .id_table = id_table, diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore deleted file mode 100644 index 7aa71d83f7..0000000000 --- a/drivers/crypto/vmx/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -aesp8-ppc.S -ghashp8-ppc.S diff --git a/drivers/crypto/vmx/Kconfig b/drivers/crypto/vmx/Kconfig deleted file mode 100644 index b2c28b87f1..0000000000 --- a/drivers/crypto/vmx/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -config CRYPTO_DEV_VMX_ENCRYPT - tristate "Encryption acceleration support on P8 CPU" - depends on CRYPTO_DEV_VMX - select CRYPTO_AES - select CRYPTO_CBC - select CRYPTO_CTR - select CRYPTO_GHASH - select CRYPTO_XTS - default m - help - Support for VMX cryptographic acceleration instructions on Power8 CPU. - This module supports acceleration for AES and GHASH in hardware. If you - choose 'M' here, this module will be called vmx-crypto. diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile deleted file mode 100644 index 7257b8c446..0000000000 --- a/drivers/crypto/vmx/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o -vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o - -ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) -override flavour := linux-ppc64le -else -ifdef CONFIG_PPC64_ELF_ABI_V2 -override flavour := linux-ppc64-elfv2 -else -override flavour := linux-ppc64 -endif -endif - -quiet_cmd_perl = PERL $@ - cmd_perl = $(PERL) $< $(flavour) > $@ - -targets += aesp8-ppc.S ghashp8-ppc.S - -$(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE - $(call if_changed,perl) - -OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y diff --git a/drivers/crypto/vmx/aes.c b/drivers/crypto/vmx/aes.c deleted file mode 100644 index ec06189fbf..0000000000 --- a/drivers/crypto/vmx/aes.c +++ /dev/null @@ -1,134 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AES routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - */ - -#include <linux/types.h> -#include <linux/err.h> -#include <linux/crypto.h> -#include <linux/delay.h> -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/internal/cipher.h> -#include <crypto/internal/simd.h> - -#include "aesp8-ppc.h" - -struct p8_aes_ctx { - struct crypto_cipher *fallback; - struct aes_key enc_key; - struct aes_key dec_key; -}; - -static int p8_aes_init(struct crypto_tfm *tfm) -{ - const char *alg = crypto_tfm_alg_name(tfm); - struct crypto_cipher *fallback; - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - fallback = crypto_alloc_cipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(fallback)) { - printk(KERN_ERR - "Failed to allocate transformation for '%s': %ld\n", - alg, PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_cipher_set_flags(fallback, - crypto_cipher_get_flags((struct - crypto_cipher *) - tfm)); - ctx->fallback = fallback; - - return 0; -} - -static void p8_aes_exit(struct crypto_tfm *tfm) -{ - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (ctx->fallback) { - crypto_free_cipher(ctx->fallback); - ctx->fallback = NULL; - } -} - -static int p8_aes_setkey(struct crypto_tfm *tfm, const u8 *key, - unsigned int keylen) -{ - int ret; - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret |= crypto_cipher_setkey(ctx->fallback, key, keylen); - - return ret ? -EINVAL : 0; -} - -static void p8_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!crypto_simd_usable()) { - crypto_cipher_encrypt_one(ctx->fallback, dst, src); - } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_encrypt(src, dst, &ctx->enc_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - } -} - -static void p8_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) -{ - struct p8_aes_ctx *ctx = crypto_tfm_ctx(tfm); - - if (!crypto_simd_usable()) { - crypto_cipher_decrypt_one(ctx->fallback, dst, src); - } else { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_decrypt(src, dst, &ctx->dec_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - } -} - -struct crypto_alg p8_aes_alg = { - .cra_name = "aes", - .cra_driver_name = "p8_aes", - .cra_module = THIS_MODULE, - .cra_priority = 1000, - .cra_type = NULL, - .cra_flags = CRYPTO_ALG_TYPE_CIPHER | CRYPTO_ALG_NEED_FALLBACK, - .cra_alignmask = 0, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct p8_aes_ctx), - .cra_init = p8_aes_init, - .cra_exit = p8_aes_exit, - .cra_cipher = { - .cia_min_keysize = AES_MIN_KEY_SIZE, - .cia_max_keysize = AES_MAX_KEY_SIZE, - .cia_setkey = p8_aes_setkey, - .cia_encrypt = p8_aes_encrypt, - .cia_decrypt = p8_aes_decrypt, - }, -}; diff --git a/drivers/crypto/vmx/aes_cbc.c b/drivers/crypto/vmx/aes_cbc.c deleted file mode 100644 index ed0debc7ac..0000000000 --- a/drivers/crypto/vmx/aes_cbc.c +++ /dev/null @@ -1,133 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AES CBC routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - */ - -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> - -#include "aesp8-ppc.h" - -struct p8_aes_cbc_ctx { - struct crypto_skcipher *fallback; - struct aes_key enc_key; - struct aes_key dec_key; -}; - -static int p8_aes_cbc_init(struct crypto_skcipher *tfm) -{ - struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher *fallback; - - fallback = crypto_alloc_skcipher("cbc(aes)", 0, - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_ASYNC); - if (IS_ERR(fallback)) { - pr_err("Failed to allocate cbc(aes) fallback: %ld\n", - PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(fallback)); - ctx->fallback = fallback; - return 0; -} - -static void p8_aes_cbc_exit(struct crypto_skcipher *tfm) -{ - struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_skcipher(ctx->fallback); -} - -static int p8_aes_cbc_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - int ret; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - ret |= aes_p8_set_decrypt_key(key, keylen * 8, &ctx->dec_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen); - - return ret ? -EINVAL : 0; -} - -static int p8_aes_cbc_crypt(struct skcipher_request *req, int enc) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct p8_aes_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int ret; - - if (!crypto_simd_usable()) { - struct skcipher_request *subreq = skcipher_request_ctx(req); - - *subreq = *req; - skcipher_request_set_tfm(subreq, ctx->fallback); - return enc ? crypto_skcipher_encrypt(subreq) : - crypto_skcipher_decrypt(subreq); - } - - ret = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk.nbytes) != 0) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_cbc_encrypt(walk.src.virt.addr, - walk.dst.virt.addr, - round_down(nbytes, AES_BLOCK_SIZE), - enc ? &ctx->enc_key : &ctx->dec_key, - walk.iv, enc); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); - } - return ret; -} - -static int p8_aes_cbc_encrypt(struct skcipher_request *req) -{ - return p8_aes_cbc_crypt(req, 1); -} - -static int p8_aes_cbc_decrypt(struct skcipher_request *req) -{ - return p8_aes_cbc_crypt(req, 0); -} - -struct skcipher_alg p8_aes_cbc_alg = { - .base.cra_name = "cbc(aes)", - .base.cra_driver_name = "p8_aes_cbc", - .base.cra_module = THIS_MODULE, - .base.cra_priority = 2000, - .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct p8_aes_cbc_ctx), - .setkey = p8_aes_cbc_setkey, - .encrypt = p8_aes_cbc_encrypt, - .decrypt = p8_aes_cbc_decrypt, - .init = p8_aes_cbc_init, - .exit = p8_aes_cbc_exit, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, -}; diff --git a/drivers/crypto/vmx/aes_ctr.c b/drivers/crypto/vmx/aes_ctr.c deleted file mode 100644 index 9a3da8cd62..0000000000 --- a/drivers/crypto/vmx/aes_ctr.c +++ /dev/null @@ -1,149 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AES CTR routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - */ - -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> - -#include "aesp8-ppc.h" - -struct p8_aes_ctr_ctx { - struct crypto_skcipher *fallback; - struct aes_key enc_key; -}; - -static int p8_aes_ctr_init(struct crypto_skcipher *tfm) -{ - struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher *fallback; - - fallback = crypto_alloc_skcipher("ctr(aes)", 0, - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_ASYNC); - if (IS_ERR(fallback)) { - pr_err("Failed to allocate ctr(aes) fallback: %ld\n", - PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(fallback)); - ctx->fallback = fallback; - return 0; -} - -static void p8_aes_ctr_exit(struct crypto_skcipher *tfm) -{ - struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_skcipher(ctx->fallback); -} - -static int p8_aes_ctr_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - int ret; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - ret = aes_p8_set_encrypt_key(key, keylen * 8, &ctx->enc_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen); - - return ret ? -EINVAL : 0; -} - -static void p8_aes_ctr_final(const struct p8_aes_ctr_ctx *ctx, - struct skcipher_walk *walk) -{ - u8 *ctrblk = walk->iv; - u8 keystream[AES_BLOCK_SIZE]; - u8 *src = walk->src.virt.addr; - u8 *dst = walk->dst.virt.addr; - unsigned int nbytes = walk->nbytes; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_encrypt(ctrblk, keystream, &ctx->enc_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - crypto_xor_cpy(dst, keystream, src, nbytes); - crypto_inc(ctrblk, AES_BLOCK_SIZE); -} - -static int p8_aes_ctr_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct p8_aes_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - int ret; - - if (!crypto_simd_usable()) { - struct skcipher_request *subreq = skcipher_request_ctx(req); - - *subreq = *req; - skcipher_request_set_tfm(subreq, ctx->fallback); - return crypto_skcipher_encrypt(subreq); - } - - ret = skcipher_walk_virt(&walk, req, false); - while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - aes_p8_ctr32_encrypt_blocks(walk.src.virt.addr, - walk.dst.virt.addr, - nbytes / AES_BLOCK_SIZE, - &ctx->enc_key, walk.iv); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - do { - crypto_inc(walk.iv, AES_BLOCK_SIZE); - } while ((nbytes -= AES_BLOCK_SIZE) >= AES_BLOCK_SIZE); - - ret = skcipher_walk_done(&walk, nbytes); - } - if (nbytes) { - p8_aes_ctr_final(ctx, &walk); - ret = skcipher_walk_done(&walk, 0); - } - return ret; -} - -struct skcipher_alg p8_aes_ctr_alg = { - .base.cra_name = "ctr(aes)", - .base.cra_driver_name = "p8_aes_ctr", - .base.cra_module = THIS_MODULE, - .base.cra_priority = 2000, - .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct p8_aes_ctr_ctx), - .setkey = p8_aes_ctr_setkey, - .encrypt = p8_aes_ctr_crypt, - .decrypt = p8_aes_ctr_crypt, - .init = p8_aes_ctr_init, - .exit = p8_aes_ctr_exit, - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, -}; diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c deleted file mode 100644 index dabbccb415..0000000000 --- a/drivers/crypto/vmx/aes_xts.c +++ /dev/null @@ -1,162 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * AES XTS routines supporting VMX In-core instructions on Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Leonidas S. Barbosa <leosilva@linux.vnet.ibm.com> - */ - -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/skcipher.h> -#include <crypto/xts.h> - -#include "aesp8-ppc.h" - -struct p8_aes_xts_ctx { - struct crypto_skcipher *fallback; - struct aes_key enc_key; - struct aes_key dec_key; - struct aes_key tweak_key; -}; - -static int p8_aes_xts_init(struct crypto_skcipher *tfm) -{ - struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_skcipher *fallback; - - fallback = crypto_alloc_skcipher("xts(aes)", 0, - CRYPTO_ALG_NEED_FALLBACK | - CRYPTO_ALG_ASYNC); - if (IS_ERR(fallback)) { - pr_err("Failed to allocate xts(aes) fallback: %ld\n", - PTR_ERR(fallback)); - return PTR_ERR(fallback); - } - - crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + - crypto_skcipher_reqsize(fallback)); - ctx->fallback = fallback; - return 0; -} - -static void p8_aes_xts_exit(struct crypto_skcipher *tfm) -{ - struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - - crypto_free_skcipher(ctx->fallback); -} - -static int p8_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - int ret; - - ret = xts_verify_key(tfm, key, keylen); - if (ret) - return ret; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key); - ret |= aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key); - ret |= aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret |= crypto_skcipher_setkey(ctx->fallback, key, keylen); - - return ret ? -EINVAL : 0; -} - -static int p8_aes_xts_crypt(struct skcipher_request *req, int enc) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct p8_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - unsigned int nbytes; - u8 tweak[AES_BLOCK_SIZE]; - int ret; - - if (req->cryptlen < AES_BLOCK_SIZE) - return -EINVAL; - - if (!crypto_simd_usable() || (req->cryptlen % XTS_BLOCK_SIZE) != 0) { - struct skcipher_request *subreq = skcipher_request_ctx(req); - - *subreq = *req; - skcipher_request_set_tfm(subreq, ctx->fallback); - return enc ? crypto_skcipher_encrypt(subreq) : - crypto_skcipher_decrypt(subreq); - } - - ret = skcipher_walk_virt(&walk, req, false); - if (ret) - return ret; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - - aes_p8_encrypt(walk.iv, tweak, &ctx->tweak_key); - - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - while ((nbytes = walk.nbytes) != 0) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - if (enc) - aes_p8_xts_encrypt(walk.src.virt.addr, - walk.dst.virt.addr, - round_down(nbytes, AES_BLOCK_SIZE), - &ctx->enc_key, NULL, tweak); - else - aes_p8_xts_decrypt(walk.src.virt.addr, - walk.dst.virt.addr, - round_down(nbytes, AES_BLOCK_SIZE), - &ctx->dec_key, NULL, tweak); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - ret = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE); - } - return ret; -} - -static int p8_aes_xts_encrypt(struct skcipher_request *req) -{ - return p8_aes_xts_crypt(req, 1); -} - -static int p8_aes_xts_decrypt(struct skcipher_request *req) -{ - return p8_aes_xts_crypt(req, 0); -} - -struct skcipher_alg p8_aes_xts_alg = { - .base.cra_name = "xts(aes)", - .base.cra_driver_name = "p8_aes_xts", - .base.cra_module = THIS_MODULE, - .base.cra_priority = 2000, - .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct p8_aes_xts_ctx), - .setkey = p8_aes_xts_setkey, - .encrypt = p8_aes_xts_encrypt, - .decrypt = p8_aes_xts_decrypt, - .init = p8_aes_xts_init, - .exit = p8_aes_xts_exit, - .min_keysize = 2 * AES_MIN_KEY_SIZE, - .max_keysize = 2 * AES_MAX_KEY_SIZE, - .ivsize = AES_BLOCK_SIZE, -}; diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h deleted file mode 100644 index 5764d44383..0000000000 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/types.h> -#include <crypto/aes.h> - -struct aes_key { - u8 key[AES_MAX_KEYLENGTH]; - int rounds; -}; - -extern struct shash_alg p8_ghash_alg; -extern struct crypto_alg p8_aes_alg; -extern struct skcipher_alg p8_aes_cbc_alg; -extern struct skcipher_alg p8_aes_ctr_alg; -extern struct skcipher_alg p8_aes_xts_alg; - -int aes_p8_set_encrypt_key(const u8 *userKey, const int bits, - struct aes_key *key); -int aes_p8_set_decrypt_key(const u8 *userKey, const int bits, - struct aes_key *key); -void aes_p8_encrypt(const u8 *in, u8 *out, const struct aes_key *key); -void aes_p8_decrypt(const u8 *in, u8 *out, const struct aes_key *key); -void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, - const struct aes_key *key, u8 *iv, const int enc); -void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, - size_t len, const struct aes_key *key, - const u8 *iv); -void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len, - const struct aes_key *key1, const struct aes_key *key2, u8 *iv); -void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len, - const struct aes_key *key1, const struct aes_key *key2, u8 *iv); diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl deleted file mode 100644 index f729589d79..0000000000 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ /dev/null @@ -1,3889 +0,0 @@ -#! /usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 - -# This code is taken from CRYPTOGAMs[1] and is included here using the option -# in the license to distribute the code under the GPL. Therefore this program -# is free software; you can redistribute it and/or modify it under the terms of -# the GNU General Public License version 2 as published by the Free Software -# Foundation. -# -# [1] https://www.openssl.org/~appro/cryptogams/ - -# Copyright (c) 2006-2017, CRYPTOGAMS by <appro@openssl.org> -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain copyright notices, -# this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials -# provided with the distribution. -# -# * Neither the name of the CRYPTOGAMS nor the names of its -# copyright holder and contributors may be used to endorse or -# promote products derived from this software without specific -# prior written permission. -# -# ALTERNATIVELY, provided that this notice is retained in full, this -# product may be distributed under the terms of the GNU General Public -# License (GPL), in which case the provisions of the GPL apply INSTEAD OF -# those given above. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see https://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# This module implements support for AES instructions as per PowerISA -# specification version 2.07, first implemented by POWER8 processor. -# The module is endian-agnostic in sense that it supports both big- -# and little-endian cases. Data alignment in parallelizable modes is -# handled with VSX loads and stores, which implies MSR.VSX flag being -# set. It should also be noted that ISA specification doesn't prohibit -# alignment exceptions for these instructions on page boundaries. -# Initially alignment was handled in pure AltiVec/VMX way [when data -# is aligned programmatically, which in turn guarantees exception- -# free execution], but it turned to hamper performance when vcipher -# instructions are interleaved. It's reckoned that eventual -# misalignment penalties at page boundaries are in average lower -# than additional overhead in pure AltiVec approach. -# -# May 2016 -# -# Add XTS subroutine, 9x on little- and 12x improvement on big-endian -# systems were measured. -# -###################################################################### -# Current large-block performance in cycles per byte processed with -# 128-bit key (less is better). -# -# CBC en-/decrypt CTR XTS -# POWER8[le] 3.96/0.72 0.74 1.1 -# POWER8[be] 3.75/0.65 0.66 1.0 - -$flavour = shift; - -if ($flavour =~ /64/) { - $SIZE_T =8; - $LRSAVE =2*$SIZE_T; - $STU ="stdu"; - $POP ="ld"; - $PUSH ="std"; - $UCMP ="cmpld"; - $SHL ="sldi"; -} elsif ($flavour =~ /32/) { - $SIZE_T =4; - $LRSAVE =$SIZE_T; - $STU ="stwu"; - $POP ="lwz"; - $PUSH ="stw"; - $UCMP ="cmplw"; - $SHL ="slwi"; -} else { die "nonsense $flavour"; } - -$LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0; - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!"; - -$FRAME=8*$SIZE_T; -$prefix="aes_p8"; - -$sp="r1"; -$vrsave="r12"; - -######################################################################### -{{{ # Key setup procedures # -my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8)); -my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6)); -my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11)); - -$code.=<<___; -.machine "any" - -.text - -.align 7 -rcon: -.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev -.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev -.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev -.long 0,0,0,0 ?asis -.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe -Lconsts: - mflr r0 - bcl 20,31,\$+4 - mflr $ptr #vvvvv "distance between . and rcon - addi $ptr,$ptr,-0x58 - mtlr r0 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 -.asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" - -.globl .${prefix}_set_encrypt_key -Lset_encrypt_key: - mflr r11 - $PUSH r11,$LRSAVE($sp) - - li $ptr,-1 - ${UCMP}i $inp,0 - beq- Lenc_key_abort # if ($inp==0) return -1; - ${UCMP}i $out,0 - beq- Lenc_key_abort # if ($out==0) return -1; - li $ptr,-2 - cmpwi $bits,128 - blt- Lenc_key_abort - cmpwi $bits,256 - bgt- Lenc_key_abort - andi. r0,$bits,0x3f - bne- Lenc_key_abort - - lis r0,0xfff0 - mfspr $vrsave,256 - mtspr 256,r0 - - bl Lconsts - mtlr r11 - - neg r9,$inp - lvx $in0,0,$inp - addi $inp,$inp,15 # 15 is not typo - lvsr $key,0,r9 # borrow $key - li r8,0x20 - cmpwi $bits,192 - lvx $in1,0,$inp - le?vspltisb $mask,0x0f # borrow $mask - lvx $rcon,0,$ptr - le?vxor $key,$key,$mask # adjust for byte swap - lvx $mask,r8,$ptr - addi $ptr,$ptr,0x10 - vperm $in0,$in0,$in1,$key # align [and byte swap in LE] - li $cnt,8 - vxor $zero,$zero,$zero - mtctr $cnt - - ?lvsr $outperm,0,$out - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$zero,$outmask,$outperm - - blt Loop128 - addi $inp,$inp,8 - beq L192 - addi $inp,$inp,8 - b L256 - -.align 4 -Loop128: - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - bdnz Loop128 - - lvx $rcon,0,$ptr # last two round keys - - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - - vperm $key,$in0,$in0,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vxor $in0,$in0,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - - addi $inp,$out,15 # 15 is not typo - addi $out,$out,0x50 - - li $rounds,10 - b Ldone - -.align 4 -L192: - lvx $tmp,0,$inp - li $cnt,4 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $out,$out,16 - vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] - vspltisb $key,8 # borrow $key - mtctr $cnt - vsububm $mask,$mask,$key # adjust the mask - -Loop192: - vperm $key,$in1,$in1,$mask # roate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vcipherlast $key,$key,$rcon - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - - vsldoi $stage,$zero,$in1,8 - vspltw $tmp,$in0,3 - vxor $tmp,$tmp,$in1 - vsldoi $in1,$zero,$in1,12 # >>32 - vadduwm $rcon,$rcon,$rcon - vxor $in1,$in1,$tmp - vxor $in0,$in0,$key - vxor $in1,$in1,$key - vsldoi $stage,$stage,$in0,8 - - vperm $key,$in1,$in1,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$stage,$stage,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vsldoi $stage,$in0,$in1,8 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vperm $outtail,$stage,$stage,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - stvx $stage,0,$out - addi $out,$out,16 - - vspltw $tmp,$in0,3 - vxor $tmp,$tmp,$in1 - vsldoi $in1,$zero,$in1,12 # >>32 - vadduwm $rcon,$rcon,$rcon - vxor $in1,$in1,$tmp - vxor $in0,$in0,$key - vxor $in1,$in1,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $inp,$out,15 # 15 is not typo - addi $out,$out,16 - bdnz Loop192 - - li $rounds,12 - addi $out,$out,0x20 - b Ldone - -.align 4 -L256: - lvx $tmp,0,$inp - li $cnt,7 - li $rounds,14 - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $out,$out,16 - vperm $in1,$in1,$tmp,$key # align [and byte swap in LE] - mtctr $cnt - -Loop256: - vperm $key,$in1,$in1,$mask # rotate-n-splat - vsldoi $tmp,$zero,$in0,12 # >>32 - vperm $outtail,$in1,$in1,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - vcipherlast $key,$key,$rcon - stvx $stage,0,$out - addi $out,$out,16 - - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in0,$in0,$tmp - vadduwm $rcon,$rcon,$rcon - vxor $in0,$in0,$key - vperm $outtail,$in0,$in0,$outperm # rotate - vsel $stage,$outhead,$outtail,$outmask - vmr $outhead,$outtail - stvx $stage,0,$out - addi $inp,$out,15 # 15 is not typo - addi $out,$out,16 - bdz Ldone - - vspltw $key,$in0,3 # just splat - vsldoi $tmp,$zero,$in1,12 # >>32 - vsbox $key,$key - - vxor $in1,$in1,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in1,$in1,$tmp - vsldoi $tmp,$zero,$tmp,12 # >>32 - vxor $in1,$in1,$tmp - - vxor $in1,$in1,$key - b Loop256 - -.align 4 -Ldone: - lvx $in1,0,$inp # redundant in aligned case - vsel $in1,$outhead,$in1,$outmask - stvx $in1,0,$inp - li $ptr,0 - mtspr 256,$vrsave - stw $rounds,0($out) - -Lenc_key_abort: - mr r3,$ptr - blr - .long 0 - .byte 0,12,0x14,1,0,0,3,0 - .long 0 -.size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key - -.globl .${prefix}_set_decrypt_key - $STU $sp,-$FRAME($sp) - mflr r10 - $PUSH r10,$FRAME+$LRSAVE($sp) - bl Lset_encrypt_key - mtlr r10 - - cmpwi r3,0 - bne- Ldec_key_abort - - slwi $cnt,$rounds,4 - subi $inp,$out,240 # first round key - srwi $rounds,$rounds,1 - add $out,$inp,$cnt # last round key - mtctr $rounds - -Ldeckey: - lwz r0, 0($inp) - lwz r6, 4($inp) - lwz r7, 8($inp) - lwz r8, 12($inp) - addi $inp,$inp,16 - lwz r9, 0($out) - lwz r10,4($out) - lwz r11,8($out) - lwz r12,12($out) - stw r0, 0($out) - stw r6, 4($out) - stw r7, 8($out) - stw r8, 12($out) - subi $out,$out,16 - stw r9, -16($inp) - stw r10,-12($inp) - stw r11,-8($inp) - stw r12,-4($inp) - bdnz Ldeckey - - xor r3,r3,r3 # return value -Ldec_key_abort: - addi $sp,$sp,$FRAME - blr - .long 0 - .byte 0,12,4,1,0x80,0,3,0 - .long 0 -.size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key -___ -}}} -######################################################################### -{{{ # Single block en- and decrypt procedures # -sub gen_block () { -my $dir = shift; -my $n = $dir eq "de" ? "n" : ""; -my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7)); - -$code.=<<___; -.globl .${prefix}_${dir}crypt - lwz $rounds,240($key) - lis r0,0xfc00 - mfspr $vrsave,256 - li $idx,15 # 15 is not typo - mtspr 256,r0 - - lvx v0,0,$inp - neg r11,$out - lvx v1,$idx,$inp - lvsl v2,0,$inp # inpperm - le?vspltisb v4,0x0f - ?lvsl v3,0,r11 # outperm - le?vxor v2,v2,v4 - li $idx,16 - vperm v0,v0,v1,v2 # align [and byte swap in LE] - lvx v1,0,$key - ?lvsl v5,0,$key # keyperm - srwi $rounds,$rounds,1 - lvx v2,$idx,$key - addi $idx,$idx,16 - subi $rounds,$rounds,1 - ?vperm v1,v1,v2,v5 # align round key - - vxor v0,v0,v1 - lvx v1,$idx,$key - addi $idx,$idx,16 - mtctr $rounds - -Loop_${dir}c: - ?vperm v2,v2,v1,v5 - v${n}cipher v0,v0,v2 - lvx v2,$idx,$key - addi $idx,$idx,16 - ?vperm v1,v1,v2,v5 - v${n}cipher v0,v0,v1 - lvx v1,$idx,$key - addi $idx,$idx,16 - bdnz Loop_${dir}c - - ?vperm v2,v2,v1,v5 - v${n}cipher v0,v0,v2 - lvx v2,$idx,$key - ?vperm v1,v1,v2,v5 - v${n}cipherlast v0,v0,v1 - - vspltisb v2,-1 - vxor v1,v1,v1 - li $idx,15 # 15 is not typo - ?vperm v2,v1,v2,v3 # outmask - le?vxor v3,v3,v4 - lvx v1,0,$out # outhead - vperm v0,v0,v0,v3 # rotate [and byte swap in LE] - vsel v1,v1,v0,v2 - lvx v4,$idx,$out - stvx v1,0,$out - vsel v0,v0,v4,v2 - stvx v0,$idx,$out - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,3,0 - .long 0 -.size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt -___ -} -&gen_block("en"); -&gen_block("de"); -}}} -######################################################################### -{{{ # CBC en- and decrypt procedures # -my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); -my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)= - map("v$_",(4..10)); -$code.=<<___; -.globl .${prefix}_cbc_encrypt - ${UCMP}i $len,16 - bltlr- - - cmpwi $enc,0 # test direction - lis r0,0xffe0 - mfspr $vrsave,256 - mtspr 256,r0 - - li $idx,15 - vxor $rndkey0,$rndkey0,$rndkey0 - le?vspltisb $tmp,0x0f - - lvx $ivec,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $ivec,$ivec,$inptail,$inpperm - - neg r11,$inp - ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) - - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inptail,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ?lvsr $outperm,0,$out # prepare for unaligned store - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - - srwi $rounds,$rounds,1 - li $idx,16 - subi $rounds,$rounds,1 - beq Lcbc_dec - -Lcbc_enc: - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - mtctr $rounds - subi $len,$len,16 # len-=16 - - lvx $rndkey0,0,$key - vperm $inout,$inout,$inptail,$inpperm - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - vxor $inout,$inout,$ivec - -Loop_cbc_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_cbc_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $ivec,$inout,$rndkey0 - ${UCMP}i $len,16 - - vperm $tmp,$ivec,$ivec,$outperm - vsel $inout,$outhead,$tmp,$outmask - vmr $outhead,$tmp - stvx $inout,0,$out - addi $out,$out,16 - bge Lcbc_enc - - b Lcbc_done - -.align 4 -Lcbc_dec: - ${UCMP}i $len,128 - bge _aesp8_cbc_decrypt8x - vmr $tmp,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - mtctr $rounds - subi $len,$len,16 # len-=16 - - lvx $rndkey0,0,$key - vperm $tmp,$tmp,$inptail,$inpperm - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$tmp,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - -Loop_cbc_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_cbc_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipherlast $inout,$inout,$rndkey0 - ${UCMP}i $len,16 - - vxor $inout,$inout,$ivec - vmr $ivec,$tmp - vperm $tmp,$inout,$inout,$outperm - vsel $inout,$outhead,$tmp,$outmask - vmr $outhead,$tmp - stvx $inout,0,$out - addi $out,$out,16 - bge Lcbc_dec - -Lcbc_done: - addi $out,$out,-1 - lvx $inout,0,$out # redundant in aligned case - vsel $inout,$outhead,$inout,$outmask - stvx $inout,0,$out - - neg $enc,$ivp # write [unaligned] iv - li $idx,15 # 15 is not typo - vxor $rndkey0,$rndkey0,$rndkey0 - vspltisb $outmask,-1 - le?vspltisb $tmp,0x0f - ?lvsl $outperm,0,$enc - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - lvx $outhead,0,$ivp - vperm $ivec,$ivec,$ivec,$outperm - vsel $inout,$outhead,$ivec,$outmask - lvx $inptail,$idx,$ivp - stvx $inout,0,$ivp - vsel $inout,$ivec,$inptail,$outmask - stvx $inout,$idx,$ivp - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,6,0 - .long 0 -___ -######################################################################### -{{ # Optimized CBC decrypt procedure # -my $key_="r11"; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); -my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13)); -my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment - -$code.=<<___; -.align 5 -_aesp8_cbc_decrypt8x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - li r10,`$FRAME+8*16+15` - li r11,`$FRAME+8*16+31` - stvx v20,r10,$sp # ABI says so - addi r10,r10,32 - stvx v21,r11,$sp - addi r11,r11,32 - stvx v22,r10,$sp - addi r10,r10,32 - stvx v23,r11,$sp - addi r11,r11,32 - stvx v24,r10,$sp - addi r10,r10,32 - stvx v25,r11,$sp - addi r11,r11,32 - stvx v26,r10,$sp - addi r10,r10,32 - stvx v27,r11,$sp - addi r11,r11,32 - stvx v28,r10,$sp - addi r10,r10,32 - stvx v29,r11,$sp - addi r11,r11,32 - stvx v30,r10,$sp - stvx v31,r11,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - subi $rounds,$rounds,3 # -4 in total - subi $len,$len,128 # bias - - lvx $rndkey0,$x00,$key # load key schedule - lvx v30,$x10,$key - addi $key,$key,0x20 - lvx v31,$x00,$key - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,$FRAME+15 - mtctr $rounds - -Load_cbc_dec_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key - addi $key,$key,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_cbc_dec_key - - lvx v26,$x10,$key - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,$FRAME+15 # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key - ?vperm v29,v29,v30,$keyperm - lvx $out0,$x70,$key # borrow $out0 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$out0,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - #lvx $inptail,0,$inp # "caller" already did this - #addi $inp,$inp,15 # 15 is not typo - subi $inp,$inp,15 # undo "caller" - - le?li $idx,8 - lvx_u $in0,$x00,$inp # load first 8 "words" - le?lvsl $inpperm,0,$idx - le?vspltisb $tmp,0x0f - lvx_u $in1,$x10,$inp - le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u - lvx_u $in2,$x20,$inp - le?vperm $in0,$in0,$in0,$inpperm - lvx_u $in3,$x30,$inp - le?vperm $in1,$in1,$in1,$inpperm - lvx_u $in4,$x40,$inp - le?vperm $in2,$in2,$in2,$inpperm - vxor $out0,$in0,$rndkey0 - lvx_u $in5,$x50,$inp - le?vperm $in3,$in3,$in3,$inpperm - vxor $out1,$in1,$rndkey0 - lvx_u $in6,$x60,$inp - le?vperm $in4,$in4,$in4,$inpperm - vxor $out2,$in2,$rndkey0 - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - le?vperm $in5,$in5,$in5,$inpperm - vxor $out3,$in3,$rndkey0 - le?vperm $in6,$in6,$in6,$inpperm - vxor $out4,$in4,$rndkey0 - le?vperm $in7,$in7,$in7,$inpperm - vxor $out5,$in5,$rndkey0 - vxor $out6,$in6,$rndkey0 - vxor $out7,$in7,$rndkey0 - - mtctr $rounds - b Loop_cbc_dec8x -.align 5 -Loop_cbc_dec8x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_cbc_dec8x - - subic $len,$len,128 # $len-=128 - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - - and r0,r0,$len - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - vncipher $out6,$out6,v26 - vncipher $out7,$out7,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in7 are loaded - # with last "words" - vncipher $out0,$out0,v27 - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - vncipher $out6,$out6,v27 - vncipher $out7,$out7,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - vncipher $out6,$out6,v28 - vncipher $out7,$out7,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - vncipher $out6,$out6,v29 - vncipher $out7,$out7,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - - vncipher $out0,$out0,v30 - vxor $ivec,$ivec,v31 # xor with last round key - vncipher $out1,$out1,v30 - vxor $in0,$in0,v31 - vncipher $out2,$out2,v30 - vxor $in1,$in1,v31 - vncipher $out3,$out3,v30 - vxor $in2,$in2,v31 - vncipher $out4,$out4,v30 - vxor $in3,$in3,v31 - vncipher $out5,$out5,v30 - vxor $in4,$in4,v31 - vncipher $out6,$out6,v30 - vxor $in5,$in5,v31 - vncipher $out7,$out7,v30 - vxor $in6,$in6,v31 - - vncipherlast $out0,$out0,$ivec - vncipherlast $out1,$out1,$in0 - lvx_u $in0,$x00,$inp # load next input block - vncipherlast $out2,$out2,$in1 - lvx_u $in1,$x10,$inp - vncipherlast $out3,$out3,$in2 - le?vperm $in0,$in0,$in0,$inpperm - lvx_u $in2,$x20,$inp - vncipherlast $out4,$out4,$in3 - le?vperm $in1,$in1,$in1,$inpperm - lvx_u $in3,$x30,$inp - vncipherlast $out5,$out5,$in4 - le?vperm $in2,$in2,$in2,$inpperm - lvx_u $in4,$x40,$inp - vncipherlast $out6,$out6,$in5 - le?vperm $in3,$in3,$in3,$inpperm - lvx_u $in5,$x50,$inp - vncipherlast $out7,$out7,$in6 - le?vperm $in4,$in4,$in4,$inpperm - lvx_u $in6,$x60,$inp - vmr $ivec,$in7 - le?vperm $in5,$in5,$in5,$inpperm - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $in6,$in6,$in6,$inpperm - vxor $out0,$in0,$rndkey0 - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $in7,$in7,$in7,$inpperm - vxor $out1,$in1,$rndkey0 - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$rndkey0 - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$rndkey0 - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$rndkey0 - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - vxor $out5,$in5,$rndkey0 - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x60,$out - vxor $out6,$in6,$rndkey0 - stvx_u $out7,$x70,$out - addi $out,$out,0x80 - vxor $out7,$in7,$rndkey0 - - mtctr $rounds - beq Loop_cbc_dec8x # did $len-=128 borrow? - - addic. $len,$len,128 - beq Lcbc_dec8x_done - nop - nop - -Loop_cbc_dec8x_tail: # up to 7 "words" tail... - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_cbc_dec8x_tail - - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - vncipher $out6,$out6,v24 - vncipher $out7,$out7,v24 - - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - vncipher $out6,$out6,v25 - vncipher $out7,$out7,v25 - - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - vncipher $out6,$out6,v26 - vncipher $out7,$out7,v26 - - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - vncipher $out6,$out6,v27 - vncipher $out7,$out7,v27 - - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - vncipher $out6,$out6,v28 - vncipher $out7,$out7,v28 - - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - vncipher $out6,$out6,v29 - vncipher $out7,$out7,v29 - - vncipher $out1,$out1,v30 - vxor $ivec,$ivec,v31 # last round key - vncipher $out2,$out2,v30 - vxor $in1,$in1,v31 - vncipher $out3,$out3,v30 - vxor $in2,$in2,v31 - vncipher $out4,$out4,v30 - vxor $in3,$in3,v31 - vncipher $out5,$out5,v30 - vxor $in4,$in4,v31 - vncipher $out6,$out6,v30 - vxor $in5,$in5,v31 - vncipher $out7,$out7,v30 - vxor $in6,$in6,v31 - - cmplwi $len,32 # switch($len) - blt Lcbc_dec8x_one - nop - beq Lcbc_dec8x_two - cmplwi $len,64 - blt Lcbc_dec8x_three - nop - beq Lcbc_dec8x_four - cmplwi $len,96 - blt Lcbc_dec8x_five - nop - beq Lcbc_dec8x_six - -Lcbc_dec8x_seven: - vncipherlast $out1,$out1,$ivec - vncipherlast $out2,$out2,$in1 - vncipherlast $out3,$out3,$in2 - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out1,$out1,$out1,$inpperm - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x00,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x10,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x20,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x30,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x40,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x50,$out - stvx_u $out7,$x60,$out - addi $out,$out,0x70 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_six: - vncipherlast $out2,$out2,$ivec - vncipherlast $out3,$out3,$in2 - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out2,$out2,$out2,$inpperm - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x00,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x10,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x20,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x30,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x40,$out - stvx_u $out7,$x50,$out - addi $out,$out,0x60 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_five: - vncipherlast $out3,$out3,$ivec - vncipherlast $out4,$out4,$in3 - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out3,$out3,$out3,$inpperm - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x00,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x10,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x20,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x30,$out - stvx_u $out7,$x40,$out - addi $out,$out,0x50 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_four: - vncipherlast $out4,$out4,$ivec - vncipherlast $out5,$out5,$in4 - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out4,$out4,$out4,$inpperm - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x00,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x10,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x20,$out - stvx_u $out7,$x30,$out - addi $out,$out,0x40 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_three: - vncipherlast $out5,$out5,$ivec - vncipherlast $out6,$out6,$in5 - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out5,$out5,$out5,$inpperm - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x00,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x10,$out - stvx_u $out7,$x20,$out - addi $out,$out,0x30 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_two: - vncipherlast $out6,$out6,$ivec - vncipherlast $out7,$out7,$in6 - vmr $ivec,$in7 - - le?vperm $out6,$out6,$out6,$inpperm - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x00,$out - stvx_u $out7,$x10,$out - addi $out,$out,0x20 - b Lcbc_dec8x_done - -.align 5 -Lcbc_dec8x_one: - vncipherlast $out7,$out7,$ivec - vmr $ivec,$in7 - - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out7,0,$out - addi $out,$out,0x10 - -Lcbc_dec8x_done: - le?vperm $ivec,$ivec,$ivec,$inpperm - stvx_u $ivec,0,$ivp # write [unaligned] iv - - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $inpperm,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x14,0,0x80,6,6,0 - .long 0 -.size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt -___ -}} }}} - -######################################################################### -{{{ # CTR procedure[s] # - -####################### WARNING: Here be dragons! ####################### -# -# This code is written as 'ctr32', based on a 32-bit counter used -# upstream. The kernel does *not* use a 32-bit counter. The kernel uses -# a 128-bit counter. -# -# This leads to subtle changes from the upstream code: the counter -# is incremented with vaddu_q_m rather than vaddu_w_m. This occurs in -# both the bulk (8 blocks at a time) path, and in the individual block -# path. Be aware of this when doing updates. -# -# See: -# 1d4aa0b4c181 ("crypto: vmx - Fixing AES-CTR counter bug") -# 009b30ac7444 ("crypto: vmx - CTR: always increment IV as quadword") -# https://github.com/openssl/openssl/pull/8942 -# -######################################################################### -my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3)); -my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)= - map("v$_",(4..11)); -my $dat=$tmp; - -$code.=<<___; -.globl .${prefix}_ctr32_encrypt_blocks - ${UCMP}i $len,1 - bltlr- - - lis r0,0xfff0 - mfspr $vrsave,256 - mtspr 256,r0 - - li $idx,15 - vxor $rndkey0,$rndkey0,$rndkey0 - le?vspltisb $tmp,0x0f - - lvx $ivec,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - vspltisb $one,1 - le?vxor $inpperm,$inpperm,$tmp - vperm $ivec,$ivec,$inptail,$inpperm - vsldoi $one,$rndkey0,$one,1 - - neg r11,$inp - ?lvsl $keyperm,0,$key # prepare for unaligned key - lwz $rounds,240($key) - - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inptail,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - srwi $rounds,$rounds,1 - li $idx,16 - subi $rounds,$rounds,1 - - ${UCMP}i $len,8 - bge _aesp8_ctr32_encrypt8x - - ?lvsr $outperm,0,$out # prepare for unaligned store - vspltisb $outmask,-1 - lvx $outhead,0,$out - ?vperm $outmask,$rndkey0,$outmask,$outperm - le?vxor $outperm,$outperm,$tmp - - lvx $rndkey0,0,$key - mtctr $rounds - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$ivec,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - b Loop_ctr32_enc - -.align 5 -Loop_ctr32_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - bdnz Loop_ctr32_enc - - vadduqm $ivec,$ivec,$one # Kernel change for 128-bit - vmr $dat,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - subic. $len,$len,1 # blocks-- - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key - vperm $dat,$dat,$inptail,$inpperm - li $idx,16 - ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm - lvx $rndkey0,0,$key - vxor $dat,$dat,$rndkey1 # last round key - vcipherlast $inout,$inout,$dat - - lvx $rndkey1,$idx,$key - addi $idx,$idx,16 - vperm $inout,$inout,$inout,$outperm - vsel $dat,$outhead,$inout,$outmask - mtctr $rounds - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vmr $outhead,$inout - vxor $inout,$ivec,$rndkey0 - lvx $rndkey0,$idx,$key - addi $idx,$idx,16 - stvx $dat,0,$out - addi $out,$out,16 - bne Loop_ctr32_enc - - addi $out,$out,-1 - lvx $inout,0,$out # redundant in aligned case - vsel $inout,$outhead,$inout,$outmask - stvx $inout,0,$out - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,6,0 - .long 0 -___ -######################################################################### -{{ # Optimized CTR procedure # -my $key_="r11"; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31)); -my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14)); -my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment -my ($two,$three,$four)=($outhead,$outperm,$outmask); - -$code.=<<___; -.align 5 -_aesp8_ctr32_encrypt8x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - li r10,`$FRAME+8*16+15` - li r11,`$FRAME+8*16+31` - stvx v20,r10,$sp # ABI says so - addi r10,r10,32 - stvx v21,r11,$sp - addi r11,r11,32 - stvx v22,r10,$sp - addi r10,r10,32 - stvx v23,r11,$sp - addi r11,r11,32 - stvx v24,r10,$sp - addi r10,r10,32 - stvx v25,r11,$sp - addi r11,r11,32 - stvx v26,r10,$sp - addi r10,r10,32 - stvx v27,r11,$sp - addi r11,r11,32 - stvx v28,r10,$sp - addi r10,r10,32 - stvx v29,r11,$sp - addi r11,r11,32 - stvx v30,r10,$sp - stvx v31,r11,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key # load key schedule - lvx v30,$x10,$key - addi $key,$key,0x20 - lvx v31,$x00,$key - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,$FRAME+15 - mtctr $rounds - -Load_ctr32_enc_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key - addi $key,$key,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_ctr32_enc_key - - lvx v26,$x10,$key - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,$FRAME+15 # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key - ?vperm v29,v29,v30,$keyperm - lvx $out0,$x70,$key # borrow $out0 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$out0,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - vadduqm $two,$one,$one - subi $inp,$inp,15 # undo "caller" - $SHL $len,$len,4 - - vadduqm $out1,$ivec,$one # counter values ... - vadduqm $out2,$ivec,$two # (do all ctr adds as 128-bit) - vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] - le?li $idx,8 - vadduqm $out3,$out1,$two - vxor $out1,$out1,$rndkey0 - le?lvsl $inpperm,0,$idx - vadduqm $out4,$out2,$two - vxor $out2,$out2,$rndkey0 - le?vspltisb $tmp,0x0f - vadduqm $out5,$out3,$two - vxor $out3,$out3,$rndkey0 - le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u - vadduqm $out6,$out4,$two - vxor $out4,$out4,$rndkey0 - vadduqm $out7,$out5,$two - vxor $out5,$out5,$rndkey0 - vadduqm $ivec,$out6,$two # next counter value - vxor $out6,$out6,$rndkey0 - vxor $out7,$out7,$rndkey0 - - mtctr $rounds - b Loop_ctr32_enc8x -.align 5 -Loop_ctr32_enc8x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - vcipher $out6,$out6,v24 - vcipher $out7,$out7,v24 -Loop_ctr32_enc8x_middle: - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - vcipher $out6,$out6,v25 - vcipher $out7,$out7,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_ctr32_enc8x - - subic r11,$len,256 # $len-256, borrow $key_ - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - vcipher $out6,$out6,v24 - vcipher $out7,$out7,v24 - - subfe r0,r0,r0 # borrow?-1:0 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - vcipher $out6,$out6,v25 - vcipher $out7,$out7,v25 - - and r0,r0,r11 - addi $key_,$sp,$FRAME+15 # rewind $key_ - vcipher $out0,$out0,v26 - vcipher $out1,$out1,v26 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - vcipher $out4,$out4,v26 - vcipher $out5,$out5,v26 - vcipher $out6,$out6,v26 - vcipher $out7,$out7,v26 - lvx v24,$x00,$key_ # re-pre-load round[1] - - subic $len,$len,129 # $len-=129 - vcipher $out0,$out0,v27 - addi $len,$len,1 # $len-=128 really - vcipher $out1,$out1,v27 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vcipher $out4,$out4,v27 - vcipher $out5,$out5,v27 - vcipher $out6,$out6,v27 - vcipher $out7,$out7,v27 - lvx v25,$x10,$key_ # re-pre-load round[2] - - vcipher $out0,$out0,v28 - lvx_u $in0,$x00,$inp # load input - vcipher $out1,$out1,v28 - lvx_u $in1,$x10,$inp - vcipher $out2,$out2,v28 - lvx_u $in2,$x20,$inp - vcipher $out3,$out3,v28 - lvx_u $in3,$x30,$inp - vcipher $out4,$out4,v28 - lvx_u $in4,$x40,$inp - vcipher $out5,$out5,v28 - lvx_u $in5,$x50,$inp - vcipher $out6,$out6,v28 - lvx_u $in6,$x60,$inp - vcipher $out7,$out7,v28 - lvx_u $in7,$x70,$inp - addi $inp,$inp,0x80 - - vcipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$inpperm - vcipher $out1,$out1,v29 - le?vperm $in1,$in1,$in1,$inpperm - vcipher $out2,$out2,v29 - le?vperm $in2,$in2,$in2,$inpperm - vcipher $out3,$out3,v29 - le?vperm $in3,$in3,$in3,$inpperm - vcipher $out4,$out4,v29 - le?vperm $in4,$in4,$in4,$inpperm - vcipher $out5,$out5,v29 - le?vperm $in5,$in5,$in5,$inpperm - vcipher $out6,$out6,v29 - le?vperm $in6,$in6,$in6,$inpperm - vcipher $out7,$out7,v29 - le?vperm $in7,$in7,$in7,$inpperm - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in7 are loaded - # with last "words" - subfe. r0,r0,r0 # borrow?-1:0 - vcipher $out0,$out0,v30 - vxor $in0,$in0,v31 # xor with last round key - vcipher $out1,$out1,v30 - vxor $in1,$in1,v31 - vcipher $out2,$out2,v30 - vxor $in2,$in2,v31 - vcipher $out3,$out3,v30 - vxor $in3,$in3,v31 - vcipher $out4,$out4,v30 - vxor $in4,$in4,v31 - vcipher $out5,$out5,v30 - vxor $in5,$in5,v31 - vcipher $out6,$out6,v30 - vxor $in6,$in6,v31 - vcipher $out7,$out7,v30 - vxor $in7,$in7,v31 - - bne Lctr32_enc8x_break # did $len-129 borrow? - - vcipherlast $in0,$out0,$in0 - vcipherlast $in1,$out1,$in1 - vadduqm $out1,$ivec,$one # counter values ... - vcipherlast $in2,$out2,$in2 - vadduqm $out2,$ivec,$two - vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0] - vcipherlast $in3,$out3,$in3 - vadduqm $out3,$out1,$two - vxor $out1,$out1,$rndkey0 - vcipherlast $in4,$out4,$in4 - vadduqm $out4,$out2,$two - vxor $out2,$out2,$rndkey0 - vcipherlast $in5,$out5,$in5 - vadduqm $out5,$out3,$two - vxor $out3,$out3,$rndkey0 - vcipherlast $in6,$out6,$in6 - vadduqm $out6,$out4,$two - vxor $out4,$out4,$rndkey0 - vcipherlast $in7,$out7,$in7 - vadduqm $out7,$out5,$two - vxor $out5,$out5,$rndkey0 - le?vperm $in0,$in0,$in0,$inpperm - vadduqm $ivec,$out6,$two # next counter value - vxor $out6,$out6,$rndkey0 - le?vperm $in1,$in1,$in1,$inpperm - vxor $out7,$out7,$rndkey0 - mtctr $rounds - - vcipher $out0,$out0,v24 - stvx_u $in0,$x00,$out - le?vperm $in2,$in2,$in2,$inpperm - vcipher $out1,$out1,v24 - stvx_u $in1,$x10,$out - le?vperm $in3,$in3,$in3,$inpperm - vcipher $out2,$out2,v24 - stvx_u $in2,$x20,$out - le?vperm $in4,$in4,$in4,$inpperm - vcipher $out3,$out3,v24 - stvx_u $in3,$x30,$out - le?vperm $in5,$in5,$in5,$inpperm - vcipher $out4,$out4,v24 - stvx_u $in4,$x40,$out - le?vperm $in6,$in6,$in6,$inpperm - vcipher $out5,$out5,v24 - stvx_u $in5,$x50,$out - le?vperm $in7,$in7,$in7,$inpperm - vcipher $out6,$out6,v24 - stvx_u $in6,$x60,$out - vcipher $out7,$out7,v24 - stvx_u $in7,$x70,$out - addi $out,$out,0x80 - - b Loop_ctr32_enc8x_middle - -.align 5 -Lctr32_enc8x_break: - cmpwi $len,-0x60 - blt Lctr32_enc8x_one - nop - beq Lctr32_enc8x_two - cmpwi $len,-0x40 - blt Lctr32_enc8x_three - nop - beq Lctr32_enc8x_four - cmpwi $len,-0x20 - blt Lctr32_enc8x_five - nop - beq Lctr32_enc8x_six - cmpwi $len,0x00 - blt Lctr32_enc8x_seven - -Lctr32_enc8x_eight: - vcipherlast $out0,$out0,$in0 - vcipherlast $out1,$out1,$in1 - vcipherlast $out2,$out2,$in2 - vcipherlast $out3,$out3,$in3 - vcipherlast $out4,$out4,$in4 - vcipherlast $out5,$out5,$in5 - vcipherlast $out6,$out6,$in6 - vcipherlast $out7,$out7,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - le?vperm $out7,$out7,$out7,$inpperm - stvx_u $out6,$x60,$out - stvx_u $out7,$x70,$out - addi $out,$out,0x80 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_seven: - vcipherlast $out0,$out0,$in1 - vcipherlast $out1,$out1,$in2 - vcipherlast $out2,$out2,$in3 - vcipherlast $out3,$out3,$in4 - vcipherlast $out4,$out4,$in5 - vcipherlast $out5,$out5,$in6 - vcipherlast $out6,$out6,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - le?vperm $out6,$out6,$out6,$inpperm - stvx_u $out5,$x50,$out - stvx_u $out6,$x60,$out - addi $out,$out,0x70 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_six: - vcipherlast $out0,$out0,$in2 - vcipherlast $out1,$out1,$in3 - vcipherlast $out2,$out2,$in4 - vcipherlast $out3,$out3,$in5 - vcipherlast $out4,$out4,$in6 - vcipherlast $out5,$out5,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - le?vperm $out5,$out5,$out5,$inpperm - stvx_u $out4,$x40,$out - stvx_u $out5,$x50,$out - addi $out,$out,0x60 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_five: - vcipherlast $out0,$out0,$in3 - vcipherlast $out1,$out1,$in4 - vcipherlast $out2,$out2,$in5 - vcipherlast $out3,$out3,$in6 - vcipherlast $out4,$out4,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$inpperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_four: - vcipherlast $out0,$out0,$in4 - vcipherlast $out1,$out1,$in5 - vcipherlast $out2,$out2,$in6 - vcipherlast $out3,$out3,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$inpperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_three: - vcipherlast $out0,$out0,$in5 - vcipherlast $out1,$out1,$in6 - vcipherlast $out2,$out2,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - le?vperm $out2,$out2,$out2,$inpperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_two: - vcipherlast $out0,$out0,$in6 - vcipherlast $out1,$out1,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - le?vperm $out1,$out1,$out1,$inpperm - stvx_u $out0,$x00,$out - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - b Lctr32_enc8x_done - -.align 5 -Lctr32_enc8x_one: - vcipherlast $out0,$out0,$in7 - - le?vperm $out0,$out0,$out0,$inpperm - stvx_u $out0,0,$out - addi $out,$out,0x10 - -Lctr32_enc8x_done: - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $inpperm,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - stvx $inpperm,r10,$sp - addi r10,r10,32 - stvx $inpperm,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x14,0,0x80,6,6,0 - .long 0 -.size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks -___ -}} }}} - -######################################################################### -{{{ # XTS procedures # -# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # -# const AES_KEY *key1, const AES_KEY *key2, # -# [const] unsigned char iv[16]); # -# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # -# input tweak value is assumed to be encrypted already, and last tweak # -# value, one suitable for consecutive call on same chunk of data, is # -# written back to original buffer. In addition, in "tweak chaining" # -# mode only complete input blocks are processed. # - -my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); -my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); -my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); -my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); -my $taillen = $key2; - - ($inp,$idx) = ($idx,$inp); # reassign - -$code.=<<___; -.globl .${prefix}_xts_encrypt - mr $inp,r3 # reassign - li r3,-1 - ${UCMP}i $len,16 - bltlr- - - lis r0,0xfff0 - mfspr r12,256 # save vrsave - li r11,0 - mtspr 256,r0 - - vspltisb $seven,0x07 # 0x070707..07 - le?lvsl $leperm,r11,r11 - le?vspltisb $tmp,0x0f - le?vxor $leperm,$leperm,$seven - - li $idx,15 - lvx $tweak,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $tweak,$tweak,$inptail,$inpperm - - neg r11,$inp - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inout,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ${UCMP}i $key2,0 # key2==NULL? - beq Lxts_enc_no_key2 - - ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - lvx $rndkey0,0,$key2 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - mtctr $rounds - -Ltweak_xts_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - bdnz Ltweak_xts_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $tweak,$tweak,$rndkey0 - - li $ivp,0 # don't chain the tweak - b Lxts_enc - -Lxts_enc_no_key2: - li $idx,-16 - and $len,$len,$idx # in "tweak chaining" - # mode only complete - # blocks are processed -Lxts_enc: - lvx $inptail,0,$inp - addi $inp,$inp,16 - - ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - vslb $eighty7,$seven,$seven # 0x808080..80 - vor $eighty7,$eighty7,$seven # 0x878787..87 - vspltisb $tmp,1 # 0x010101..01 - vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 - - ${UCMP}i $len,96 - bge _aesp8_xts_encrypt6x - - andi. $taillen,$len,15 - subic r0,$len,32 - subi $taillen,$taillen,16 - subfe r0,r0,r0 - and r0,r0,$taillen - add $inp,$inp,r0 - - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - mtctr $rounds - b Loop_xts_enc - -.align 5 -Loop_xts_enc: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_enc - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak - vcipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - addi $out,$out,16 - - subic. $len,$len,16 - beq Lxts_enc_done - - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - - subic r0,$len,32 - subfe r0,r0,r0 - and r0,r0,$taillen - add $inp,$inp,r0 - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $output,$output,$rndkey0 # just in case $len<16 - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - mtctr $rounds - ${UCMP}i $len,16 - bge Loop_xts_enc - - vxor $output,$output,$tweak - lvsr $inpperm,0,$len # $inpperm is no longer needed - vxor $inptail,$inptail,$inptail # $inptail is no longer needed - vspltisb $tmp,-1 - vperm $inptail,$inptail,$tmp,$inpperm - vsel $inout,$inout,$output,$inptail - - subi r11,$out,17 - subi $out,$out,16 - mtctr $len - li $len,16 -Loop_xts_enc_steal: - lbzu r0,1(r11) - stb r0,16(r11) - bdnz Loop_xts_enc_steal - - mtctr $rounds - b Loop_xts_enc # one more time... - -Lxts_enc_done: - ${UCMP}i $ivp,0 - beq Lxts_enc_ret - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_enc_ret: - mtspr 256,r12 # restore vrsave - li r3,0 - blr - .long 0 - .byte 0,12,0x04,0,0x80,6,6,0 - .long 0 -.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt - -.globl .${prefix}_xts_decrypt - mr $inp,r3 # reassign - li r3,-1 - ${UCMP}i $len,16 - bltlr- - - lis r0,0xfff8 - mfspr r12,256 # save vrsave - li r11,0 - mtspr 256,r0 - - andi. r0,$len,15 - neg r0,r0 - andi. r0,r0,16 - sub $len,$len,r0 - - vspltisb $seven,0x07 # 0x070707..07 - le?lvsl $leperm,r11,r11 - le?vspltisb $tmp,0x0f - le?vxor $leperm,$leperm,$seven - - li $idx,15 - lvx $tweak,0,$ivp # load [unaligned] iv - lvsl $inpperm,0,$ivp - lvx $inptail,$idx,$ivp - le?vxor $inpperm,$inpperm,$tmp - vperm $tweak,$tweak,$inptail,$inpperm - - neg r11,$inp - lvsr $inpperm,0,r11 # prepare for unaligned load - lvx $inout,0,$inp - addi $inp,$inp,15 # 15 is not typo - le?vxor $inpperm,$inpperm,$tmp - - ${UCMP}i $key2,0 # key2==NULL? - beq Lxts_dec_no_key2 - - ?lvsl $keyperm,0,$key2 # prepare for unaligned key - lwz $rounds,240($key2) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - lvx $rndkey0,0,$key2 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - mtctr $rounds - -Ltweak_xts_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipher $tweak,$tweak,$rndkey0 - lvx $rndkey0,$idx,$key2 - addi $idx,$idx,16 - bdnz Ltweak_xts_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vcipher $tweak,$tweak,$rndkey1 - lvx $rndkey1,$idx,$key2 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vcipherlast $tweak,$tweak,$rndkey0 - - li $ivp,0 # don't chain the tweak - b Lxts_dec - -Lxts_dec_no_key2: - neg $idx,$len - andi. $idx,$idx,15 - add $len,$len,$idx # in "tweak chaining" - # mode only complete - # blocks are processed -Lxts_dec: - lvx $inptail,0,$inp - addi $inp,$inp,16 - - ?lvsl $keyperm,0,$key1 # prepare for unaligned key - lwz $rounds,240($key1) - srwi $rounds,$rounds,1 - subi $rounds,$rounds,1 - li $idx,16 - - vslb $eighty7,$seven,$seven # 0x808080..80 - vor $eighty7,$eighty7,$seven # 0x878787..87 - vspltisb $tmp,1 # 0x010101..01 - vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 - - ${UCMP}i $len,96 - bge _aesp8_xts_decrypt6x - - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - mtctr $rounds - - ${UCMP}i $len,16 - blt Ltail_xts_dec - be?b Loop_xts_dec - -.align 5 -Loop_xts_dec: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_dec - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak - vncipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - addi $out,$out,16 - - subic. $len,$len,16 - beq Lxts_dec_done - - vmr $inout,$inptail - lvx $inptail,0,$inp - addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $inout,$inout,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - mtctr $rounds - ${UCMP}i $len,16 - bge Loop_xts_dec - -Ltail_xts_dec: - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak1,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak1,$tweak1,$tmp - - subi $inp,$inp,16 - add $inp,$inp,$len - - vxor $inout,$inout,$tweak # :-( - vxor $inout,$inout,$tweak1 # :-) - -Loop_xts_dec_short: - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vncipher $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - bdnz Loop_xts_dec_short - - ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm - vncipher $inout,$inout,$rndkey1 - lvx $rndkey1,$idx,$key1 - li $idx,16 - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - vxor $rndkey0,$rndkey0,$tweak1 - vncipherlast $output,$inout,$rndkey0 - - le?vperm $tmp,$output,$output,$leperm - be?nop - le?stvx_u $tmp,0,$out - be?stvx_u $output,0,$out - - vmr $inout,$inptail - lvx $inptail,0,$inp - #addi $inp,$inp,16 - lvx $rndkey0,0,$key1 - lvx $rndkey1,$idx,$key1 - addi $idx,$idx,16 - vperm $inout,$inout,$inptail,$inpperm - ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm - - lvsr $inpperm,0,$len # $inpperm is no longer needed - vxor $inptail,$inptail,$inptail # $inptail is no longer needed - vspltisb $tmp,-1 - vperm $inptail,$inptail,$tmp,$inpperm - vsel $inout,$inout,$output,$inptail - - vxor $rndkey0,$rndkey0,$tweak - vxor $inout,$inout,$rndkey0 - lvx $rndkey0,$idx,$key1 - addi $idx,$idx,16 - - subi r11,$out,1 - mtctr $len - li $len,16 -Loop_xts_dec_steal: - lbzu r0,1(r11) - stb r0,16(r11) - bdnz Loop_xts_dec_steal - - mtctr $rounds - b Loop_xts_dec # one more time... - -Lxts_dec_done: - ${UCMP}i $ivp,0 - beq Lxts_dec_ret - - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vsldoi $tmp,$tmp,$tmp,15 - vand $tmp,$tmp,$eighty7 - vxor $tweak,$tweak,$tmp - - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_dec_ret: - mtspr 256,r12 # restore vrsave - li r3,0 - blr - .long 0 - .byte 0,12,0x04,0,0x80,6,6,0 - .long 0 -.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt -___ -######################################################################### -{{ # Optimized XTS procedures # -my $key_=$key2; -my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); - $x00=0 if ($flavour =~ /osx/); -my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); -my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); -my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); -my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys - # v26-v31 last 6 round keys -my ($keyperm)=($out0); # aliases with "caller", redundant assignment -my $taillen=$x70; - -$code.=<<___; -.align 5 -_aesp8_xts_encrypt6x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - mflr r11 - li r7,`$FRAME+8*16+15` - li r3,`$FRAME+8*16+31` - $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) - stvx v20,r7,$sp # ABI says so - addi r7,r7,32 - stvx v21,r3,$sp - addi r3,r3,32 - stvx v22,r7,$sp - addi r7,r7,32 - stvx v23,r3,$sp - addi r3,r3,32 - stvx v24,r7,$sp - addi r7,r7,32 - stvx v25,r3,$sp - addi r3,r3,32 - stvx v26,r7,$sp - addi r7,r7,32 - stvx v27,r3,$sp - addi r3,r3,32 - stvx v28,r7,$sp - addi r7,r7,32 - stvx v29,r3,$sp - addi r3,r3,32 - stvx v30,r7,$sp - stvx v31,r3,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - xxlor 2, 32+$eighty7, 32+$eighty7 - vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 - xxlor 1, 32+$eighty7, 32+$eighty7 - - # Load XOR Lconsts. - mr $x70, r6 - bl Lconsts - lxvw4x 0, $x40, r6 # load XOR contents - mr r6, $x70 - li $x70,0x70 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key1 # load key schedule - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - lvx v31,$x00,$key1 - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,$FRAME+15 - mtctr $rounds - -Load_xts_enc_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key1 - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_xts_enc_key - - lvx v26,$x10,$key1 - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key1 - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key1 - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,$FRAME+15 # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key1 - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key1 - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key1 - ?vperm v29,v29,v30,$keyperm - lvx $twk5,$x70,$key1 # borrow $twk5 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$twk5,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - # Switch to use the following codes with 0x010101..87 to generate tweak. - # eighty7 = 0x010101..87 - # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits - # vand tmp, tmp, eighty7 # last byte with carry - # vaddubm tweak, tweak, tweak # left shift 1 bit (x2) - # xxlor vsx, 0, 0 - # vpermxor tweak, tweak, tmp, vsx - - vperm $in0,$inout,$inptail,$inpperm - subi $inp,$inp,31 # undo "caller" - vxor $twk0,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vand $tmp,$tmp,$eighty7 - vxor $out0,$in0,$twk0 - xxlor 32+$in1, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in1 - - lvx_u $in1,$x10,$inp - vxor $twk1,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in1,$in1,$in1,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out1,$in1,$twk1 - xxlor 32+$in2, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in2 - - lvx_u $in2,$x20,$inp - andi. $taillen,$len,15 - vxor $twk2,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in2,$in2,$in2,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out2,$in2,$twk2 - xxlor 32+$in3, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in3 - - lvx_u $in3,$x30,$inp - sub $len,$len,$taillen - vxor $twk3,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in3,$in3,$in3,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out3,$in3,$twk3 - xxlor 32+$in4, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in4 - - lvx_u $in4,$x40,$inp - subi $len,$len,0x60 - vxor $twk4,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in4,$in4,$in4,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out4,$in4,$twk4 - xxlor 32+$in5, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in5 - - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - vxor $twk5,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in5,$in5,$in5,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out5,$in5,$twk5 - xxlor 32+$in0, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in0 - - vxor v31,v31,$rndkey0 - mtctr $rounds - b Loop_xts_enc6x - -.align 5 -Loop_xts_enc6x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_enc6x - - xxlor 32+$eighty7, 1, 1 # 0x010101..87 - - subic $len,$len,96 # $len-=96 - vxor $in0,$twk0,v31 # xor with last round key - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk0,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - vcipher $out5,$out5,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vand $tmp,$tmp,$eighty7 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - xxlor 32+$in1, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in1 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vxor $in1,$twk1,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk1,$tweak,$rndkey0 - vcipher $out4,$out4,v25 - vcipher $out5,$out5,v25 - - and r0,r0,$len - vaddubm $tweak,$tweak,$tweak - vcipher $out0,$out0,v26 - vcipher $out1,$out1,v26 - vand $tmp,$tmp,$eighty7 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - xxlor 32+$in2, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in2 - vcipher $out4,$out4,v26 - vcipher $out5,$out5,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in5 are loaded - # with last "words" - vxor $in2,$twk2,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk2,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vcipher $out0,$out0,v27 - vcipher $out1,$out1,v27 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vand $tmp,$tmp,$eighty7 - vcipher $out4,$out4,v27 - vcipher $out5,$out5,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - xxlor 32+$in3, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in3 - vcipher $out0,$out0,v28 - vcipher $out1,$out1,v28 - vxor $in3,$twk3,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk3,$tweak,$rndkey0 - vcipher $out2,$out2,v28 - vcipher $out3,$out3,v28 - vaddubm $tweak,$tweak,$tweak - vcipher $out4,$out4,v28 - vcipher $out5,$out5,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vand $tmp,$tmp,$eighty7 - - vcipher $out0,$out0,v29 - vcipher $out1,$out1,v29 - xxlor 32+$in4, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in4 - vcipher $out2,$out2,v29 - vcipher $out3,$out3,v29 - vxor $in4,$twk4,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk4,$tweak,$rndkey0 - vcipher $out4,$out4,v29 - vcipher $out5,$out5,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vaddubm $tweak,$tweak,$tweak - - vcipher $out0,$out0,v30 - vcipher $out1,$out1,v30 - vand $tmp,$tmp,$eighty7 - vcipher $out2,$out2,v30 - vcipher $out3,$out3,v30 - xxlor 32+$in5, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in5 - vcipher $out4,$out4,v30 - vcipher $out5,$out5,v30 - vxor $in5,$twk5,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk5,$tweak,$rndkey0 - - vcipherlast $out0,$out0,$in0 - lvx_u $in0,$x00,$inp # load next input block - vaddubm $tweak,$tweak,$tweak - vcipherlast $out1,$out1,$in1 - lvx_u $in1,$x10,$inp - vcipherlast $out2,$out2,$in2 - le?vperm $in0,$in0,$in0,$leperm - lvx_u $in2,$x20,$inp - vand $tmp,$tmp,$eighty7 - vcipherlast $out3,$out3,$in3 - le?vperm $in1,$in1,$in1,$leperm - lvx_u $in3,$x30,$inp - vcipherlast $out4,$out4,$in4 - le?vperm $in2,$in2,$in2,$leperm - lvx_u $in4,$x40,$inp - xxlor 10, 32+$in0, 32+$in0 - xxlor 32+$in0, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in0 - xxlor 32+$in0, 10, 10 - vcipherlast $tmp,$out5,$in5 # last block might be needed - # in stealing mode - le?vperm $in3,$in3,$in3,$leperm - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - le?vperm $in4,$in4,$in4,$leperm - le?vperm $in5,$in5,$in5,$leperm - - le?vperm $out0,$out0,$out0,$leperm - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk0 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $out1,$in1,$twk1 - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$twk2 - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$twk3 - le?vperm $out5,$tmp,$tmp,$leperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$twk4 - le?stvx_u $out5,$x50,$out - be?stvx_u $tmp, $x50,$out - vxor $out5,$in5,$twk5 - addi $out,$out,0x60 - - mtctr $rounds - beq Loop_xts_enc6x # did $len-=96 borrow? - - xxlor 32+$eighty7, 2, 2 # 0x010101..87 - - addic. $len,$len,0x60 - beq Lxts_enc6x_zero - cmpwi $len,0x20 - blt Lxts_enc6x_one - nop - beq Lxts_enc6x_two - cmpwi $len,0x40 - blt Lxts_enc6x_three - nop - beq Lxts_enc6x_four - -Lxts_enc6x_five: - vxor $out0,$in1,$twk0 - vxor $out1,$in2,$twk1 - vxor $out2,$in3,$twk2 - vxor $out3,$in4,$twk3 - vxor $out4,$in5,$twk4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk5 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $tmp,$out4,$twk5 # last block prep for stealing - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_four: - vxor $out0,$in2,$twk0 - vxor $out1,$in3,$twk1 - vxor $out2,$in4,$twk2 - vxor $out3,$in5,$twk3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk4 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $tmp,$out3,$twk4 # last block prep for stealing - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_three: - vxor $out0,$in3,$twk0 - vxor $out1,$in4,$twk1 - vxor $out2,$in5,$twk2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk3 # unused tweak - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $tmp,$out2,$twk3 # last block prep for stealing - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_two: - vxor $out0,$in4,$twk0 - vxor $out1,$in5,$twk1 - vxor $out2,$out2,$out2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_enc5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk2 # unused tweak - vxor $tmp,$out1,$twk2 # last block prep for stealing - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_one: - vxor $out0,$in5,$twk0 - nop -Loop_xts_enc1x: - vcipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_enc1x - - add $inp,$inp,$taillen - cmpwi $taillen,0 - vcipher $out0,$out0,v24 - - subi $inp,$inp,16 - vcipher $out0,$out0,v25 - - lvsr $inpperm,0,$taillen - vcipher $out0,$out0,v26 - - lvx_u $in0,0,$inp - vcipher $out0,$out0,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vcipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vcipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk0,$twk0,v31 - - le?vperm $in0,$in0,$in0,$leperm - vcipher $out0,$out0,v30 - - vperm $in0,$in0,$in0,$inpperm - vcipherlast $out0,$out0,$twk0 - - vmr $twk0,$twk1 # unused tweak - vxor $tmp,$out0,$twk1 # last block prep for stealing - le?vperm $out0,$out0,$out0,$leperm - stvx_u $out0,$x00,$out # store output - addi $out,$out,0x10 - bne Lxts_enc6x_steal - b Lxts_enc6x_done - -.align 4 -Lxts_enc6x_zero: - cmpwi $taillen,0 - beq Lxts_enc6x_done - - add $inp,$inp,$taillen - subi $inp,$inp,16 - lvx_u $in0,0,$inp - lvsr $inpperm,0,$taillen # $in5 is no more - le?vperm $in0,$in0,$in0,$leperm - vperm $in0,$in0,$in0,$inpperm - vxor $tmp,$tmp,$twk0 -Lxts_enc6x_steal: - vxor $in0,$in0,$twk0 - vxor $out0,$out0,$out0 - vspltisb $out1,-1 - vperm $out0,$out0,$out1,$inpperm - vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? - - subi r30,$out,17 - subi $out,$out,16 - mtctr $taillen -Loop_xts_enc6x_steal: - lbzu r0,1(r30) - stb r0,16(r30) - bdnz Loop_xts_enc6x_steal - - li $taillen,0 - mtctr $rounds - b Loop_xts_enc1x # one more time... - -.align 4 -Lxts_enc6x_done: - ${UCMP}i $ivp,0 - beq Lxts_enc6x_ret - - vxor $tweak,$twk0,$rndkey0 - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_enc6x_ret: - mtlr r11 - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $seven,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x04,1,0x80,6,6,0 - .long 0 - -.align 5 -_aesp8_xts_enc5x: - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - lvx v25,$x10,$key_ # round[4] - bdnz _aesp8_xts_enc5x - - add $inp,$inp,$taillen - cmpwi $taillen,0 - vcipher $out0,$out0,v24 - vcipher $out1,$out1,v24 - vcipher $out2,$out2,v24 - vcipher $out3,$out3,v24 - vcipher $out4,$out4,v24 - - subi $inp,$inp,16 - vcipher $out0,$out0,v25 - vcipher $out1,$out1,v25 - vcipher $out2,$out2,v25 - vcipher $out3,$out3,v25 - vcipher $out4,$out4,v25 - vxor $twk0,$twk0,v31 - - vcipher $out0,$out0,v26 - lvsr $inpperm,r0,$taillen # $in5 is no more - vcipher $out1,$out1,v26 - vcipher $out2,$out2,v26 - vcipher $out3,$out3,v26 - vcipher $out4,$out4,v26 - vxor $in1,$twk1,v31 - - vcipher $out0,$out0,v27 - lvx_u $in0,0,$inp - vcipher $out1,$out1,v27 - vcipher $out2,$out2,v27 - vcipher $out3,$out3,v27 - vcipher $out4,$out4,v27 - vxor $in2,$twk2,v31 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vcipher $out0,$out0,v28 - vcipher $out1,$out1,v28 - vcipher $out2,$out2,v28 - vcipher $out3,$out3,v28 - vcipher $out4,$out4,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vxor $in3,$twk3,v31 - - vcipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$leperm - vcipher $out1,$out1,v29 - vcipher $out2,$out2,v29 - vcipher $out3,$out3,v29 - vcipher $out4,$out4,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $in4,$twk4,v31 - - vcipher $out0,$out0,v30 - vperm $in0,$in0,$in0,$inpperm - vcipher $out1,$out1,v30 - vcipher $out2,$out2,v30 - vcipher $out3,$out3,v30 - vcipher $out4,$out4,v30 - - vcipherlast $out0,$out0,$twk0 - vcipherlast $out1,$out1,$in1 - vcipherlast $out2,$out2,$in2 - vcipherlast $out3,$out3,$in3 - vcipherlast $out4,$out4,$in4 - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 - -.align 5 -_aesp8_xts_decrypt6x: - $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) - mflr r11 - li r7,`$FRAME+8*16+15` - li r3,`$FRAME+8*16+31` - $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) - stvx v20,r7,$sp # ABI says so - addi r7,r7,32 - stvx v21,r3,$sp - addi r3,r3,32 - stvx v22,r7,$sp - addi r7,r7,32 - stvx v23,r3,$sp - addi r3,r3,32 - stvx v24,r7,$sp - addi r7,r7,32 - stvx v25,r3,$sp - addi r3,r3,32 - stvx v26,r7,$sp - addi r7,r7,32 - stvx v27,r3,$sp - addi r3,r3,32 - stvx v28,r7,$sp - addi r7,r7,32 - stvx v29,r3,$sp - addi r3,r3,32 - stvx v30,r7,$sp - stvx v31,r3,$sp - li r0,-1 - stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave - li $x10,0x10 - $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) - li $x20,0x20 - $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) - li $x30,0x30 - $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) - li $x40,0x40 - $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) - li $x50,0x50 - $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) - li $x60,0x60 - $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) - li $x70,0x70 - mtspr 256,r0 - - xxlor 2, 32+$eighty7, 32+$eighty7 - vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87 - xxlor 1, 32+$eighty7, 32+$eighty7 - - # Load XOR Lconsts. - mr $x70, r6 - bl Lconsts - lxvw4x 0, $x40, r6 # load XOR contents - mr r6, $x70 - li $x70,0x70 - - subi $rounds,$rounds,3 # -4 in total - - lvx $rndkey0,$x00,$key1 # load key schedule - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - lvx v31,$x00,$key1 - ?vperm $rndkey0,$rndkey0,v30,$keyperm - addi $key_,$sp,$FRAME+15 - mtctr $rounds - -Load_xts_dec_key: - ?vperm v24,v30,v31,$keyperm - lvx v30,$x10,$key1 - addi $key1,$key1,0x20 - stvx v24,$x00,$key_ # off-load round[1] - ?vperm v25,v31,v30,$keyperm - lvx v31,$x00,$key1 - stvx v25,$x10,$key_ # off-load round[2] - addi $key_,$key_,0x20 - bdnz Load_xts_dec_key - - lvx v26,$x10,$key1 - ?vperm v24,v30,v31,$keyperm - lvx v27,$x20,$key1 - stvx v24,$x00,$key_ # off-load round[3] - ?vperm v25,v31,v26,$keyperm - lvx v28,$x30,$key1 - stvx v25,$x10,$key_ # off-load round[4] - addi $key_,$sp,$FRAME+15 # rewind $key_ - ?vperm v26,v26,v27,$keyperm - lvx v29,$x40,$key1 - ?vperm v27,v27,v28,$keyperm - lvx v30,$x50,$key1 - ?vperm v28,v28,v29,$keyperm - lvx v31,$x60,$key1 - ?vperm v29,v29,v30,$keyperm - lvx $twk5,$x70,$key1 # borrow $twk5 - ?vperm v30,v30,v31,$keyperm - lvx v24,$x00,$key_ # pre-load round[1] - ?vperm v31,v31,$twk5,$keyperm - lvx v25,$x10,$key_ # pre-load round[2] - - vperm $in0,$inout,$inptail,$inpperm - subi $inp,$inp,31 # undo "caller" - vxor $twk0,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - vand $tmp,$tmp,$eighty7 - vxor $out0,$in0,$twk0 - xxlor 32+$in1, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in1 - - lvx_u $in1,$x10,$inp - vxor $twk1,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in1,$in1,$in1,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out1,$in1,$twk1 - xxlor 32+$in2, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in2 - - lvx_u $in2,$x20,$inp - andi. $taillen,$len,15 - vxor $twk2,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in2,$in2,$in2,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out2,$in2,$twk2 - xxlor 32+$in3, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in3 - - lvx_u $in3,$x30,$inp - sub $len,$len,$taillen - vxor $twk3,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in3,$in3,$in3,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out3,$in3,$twk3 - xxlor 32+$in4, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in4 - - lvx_u $in4,$x40,$inp - subi $len,$len,0x60 - vxor $twk4,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in4,$in4,$in4,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out4,$in4,$twk4 - xxlor 32+$in5, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in5 - - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - vxor $twk5,$tweak,$rndkey0 - vsrab $tmp,$tweak,$seven # next tweak value - vaddubm $tweak,$tweak,$tweak - le?vperm $in5,$in5,$in5,$leperm - vand $tmp,$tmp,$eighty7 - vxor $out5,$in5,$twk5 - xxlor 32+$in0, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in0 - - vxor v31,v31,$rndkey0 - mtctr $rounds - b Loop_xts_dec6x - -.align 5 -Loop_xts_dec6x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_dec6x - - xxlor 32+$eighty7, 1, 1 # 0x010101..87 - - subic $len,$len,96 # $len-=96 - vxor $in0,$twk0,v31 # xor with last round key - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk0,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - vncipher $out5,$out5,v24 - - subfe. r0,r0,r0 # borrow?-1:0 - vand $tmp,$tmp,$eighty7 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - xxlor 32+$in1, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in1 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vxor $in1,$twk1,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk1,$tweak,$rndkey0 - vncipher $out4,$out4,v25 - vncipher $out5,$out5,v25 - - and r0,r0,$len - vaddubm $tweak,$tweak,$tweak - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vand $tmp,$tmp,$eighty7 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - xxlor 32+$in2, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in2 - vncipher $out4,$out4,v26 - vncipher $out5,$out5,v26 - - add $inp,$inp,r0 # $inp is adjusted in such - # way that at exit from the - # loop inX-in5 are loaded - # with last "words" - vxor $in2,$twk2,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk2,$tweak,$rndkey0 - vaddubm $tweak,$tweak,$tweak - vncipher $out0,$out0,v27 - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vand $tmp,$tmp,$eighty7 - vncipher $out4,$out4,v27 - vncipher $out5,$out5,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - xxlor 32+$in3, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in3 - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vxor $in3,$twk3,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk3,$tweak,$rndkey0 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vaddubm $tweak,$tweak,$tweak - vncipher $out4,$out4,v28 - vncipher $out5,$out5,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vand $tmp,$tmp,$eighty7 - - vncipher $out0,$out0,v29 - vncipher $out1,$out1,v29 - xxlor 32+$in4, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in4 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vxor $in4,$twk4,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk4,$tweak,$rndkey0 - vncipher $out4,$out4,v29 - vncipher $out5,$out5,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vaddubm $tweak,$tweak,$tweak - - vncipher $out0,$out0,v30 - vncipher $out1,$out1,v30 - vand $tmp,$tmp,$eighty7 - vncipher $out2,$out2,v30 - vncipher $out3,$out3,v30 - xxlor 32+$in5, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in5 - vncipher $out4,$out4,v30 - vncipher $out5,$out5,v30 - vxor $in5,$twk5,v31 - vsrab $tmp,$tweak,$seven # next tweak value - vxor $twk5,$tweak,$rndkey0 - - vncipherlast $out0,$out0,$in0 - lvx_u $in0,$x00,$inp # load next input block - vaddubm $tweak,$tweak,$tweak - vncipherlast $out1,$out1,$in1 - lvx_u $in1,$x10,$inp - vncipherlast $out2,$out2,$in2 - le?vperm $in0,$in0,$in0,$leperm - lvx_u $in2,$x20,$inp - vand $tmp,$tmp,$eighty7 - vncipherlast $out3,$out3,$in3 - le?vperm $in1,$in1,$in1,$leperm - lvx_u $in3,$x30,$inp - vncipherlast $out4,$out4,$in4 - le?vperm $in2,$in2,$in2,$leperm - lvx_u $in4,$x40,$inp - xxlor 10, 32+$in0, 32+$in0 - xxlor 32+$in0, 0, 0 - vpermxor $tweak, $tweak, $tmp, $in0 - xxlor 32+$in0, 10, 10 - vncipherlast $out5,$out5,$in5 - le?vperm $in3,$in3,$in3,$leperm - lvx_u $in5,$x50,$inp - addi $inp,$inp,0x60 - le?vperm $in4,$in4,$in4,$leperm - le?vperm $in5,$in5,$in5,$leperm - - le?vperm $out0,$out0,$out0,$leperm - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk0 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - vxor $out1,$in1,$twk1 - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - vxor $out2,$in2,$twk2 - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - vxor $out3,$in3,$twk3 - le?vperm $out5,$out5,$out5,$leperm - stvx_u $out4,$x40,$out - vxor $out4,$in4,$twk4 - stvx_u $out5,$x50,$out - vxor $out5,$in5,$twk5 - addi $out,$out,0x60 - - mtctr $rounds - beq Loop_xts_dec6x # did $len-=96 borrow? - - xxlor 32+$eighty7, 2, 2 # 0x010101..87 - - addic. $len,$len,0x60 - beq Lxts_dec6x_zero - cmpwi $len,0x20 - blt Lxts_dec6x_one - nop - beq Lxts_dec6x_two - cmpwi $len,0x40 - blt Lxts_dec6x_three - nop - beq Lxts_dec6x_four - -Lxts_dec6x_five: - vxor $out0,$in1,$twk0 - vxor $out1,$in2,$twk1 - vxor $out2,$in3,$twk2 - vxor $out3,$in4,$twk3 - vxor $out4,$in5,$twk4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk5 # unused tweak - vxor $twk1,$tweak,$rndkey0 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk1 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - le?vperm $out4,$out4,$out4,$leperm - stvx_u $out3,$x30,$out - stvx_u $out4,$x40,$out - addi $out,$out,0x50 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_four: - vxor $out0,$in2,$twk0 - vxor $out1,$in3,$twk1 - vxor $out2,$in4,$twk2 - vxor $out3,$in5,$twk3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk4 # unused tweak - vmr $twk1,$twk5 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk5 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - le?vperm $out3,$out3,$out3,$leperm - stvx_u $out2,$x20,$out - stvx_u $out3,$x30,$out - addi $out,$out,0x40 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_three: - vxor $out0,$in3,$twk0 - vxor $out1,$in4,$twk1 - vxor $out2,$in5,$twk2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk3 # unused tweak - vmr $twk1,$twk4 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk4 - le?vperm $out2,$out2,$out2,$leperm - stvx_u $out1,$x10,$out - stvx_u $out2,$x20,$out - addi $out,$out,0x30 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_two: - vxor $out0,$in4,$twk0 - vxor $out1,$in5,$twk1 - vxor $out2,$out2,$out2 - vxor $out3,$out3,$out3 - vxor $out4,$out4,$out4 - - bl _aesp8_xts_dec5x - - le?vperm $out0,$out0,$out0,$leperm - vmr $twk0,$twk2 # unused tweak - vmr $twk1,$twk3 - le?vperm $out1,$out1,$out1,$leperm - stvx_u $out0,$x00,$out # store output - vxor $out0,$in0,$twk3 - stvx_u $out1,$x10,$out - addi $out,$out,0x20 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_one: - vxor $out0,$in5,$twk0 - nop -Loop_xts_dec1x: - vncipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Loop_xts_dec1x - - subi r0,$taillen,1 - vncipher $out0,$out0,v24 - - andi. r0,r0,16 - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - - sub $inp,$inp,r0 - vncipher $out0,$out0,v26 - - lvx_u $in0,0,$inp - vncipher $out0,$out0,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vncipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk0,$twk0,v31 - - le?vperm $in0,$in0,$in0,$leperm - vncipher $out0,$out0,v30 - - mtctr $rounds - vncipherlast $out0,$out0,$twk0 - - vmr $twk0,$twk1 # unused tweak - vmr $twk1,$twk2 - le?vperm $out0,$out0,$out0,$leperm - stvx_u $out0,$x00,$out # store output - addi $out,$out,0x10 - vxor $out0,$in0,$twk2 - bne Lxts_dec6x_steal - b Lxts_dec6x_done - -.align 4 -Lxts_dec6x_zero: - cmpwi $taillen,0 - beq Lxts_dec6x_done - - lvx_u $in0,0,$inp - le?vperm $in0,$in0,$in0,$leperm - vxor $out0,$in0,$twk1 -Lxts_dec6x_steal: - vncipher $out0,$out0,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - lvx v25,$x10,$key_ # round[4] - bdnz Lxts_dec6x_steal - - add $inp,$inp,$taillen - vncipher $out0,$out0,v24 - - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - - lvx_u $in0,0,$inp - vncipher $out0,$out0,v26 - - lvsr $inpperm,0,$taillen # $in5 is no more - vncipher $out0,$out0,v27 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vncipher $out0,$out0,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - - vncipher $out0,$out0,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $twk1,$twk1,v31 - - le?vperm $in0,$in0,$in0,$leperm - vncipher $out0,$out0,v30 - - vperm $in0,$in0,$in0,$inpperm - vncipherlast $tmp,$out0,$twk1 - - le?vperm $out0,$tmp,$tmp,$leperm - le?stvx_u $out0,0,$out - be?stvx_u $tmp,0,$out - - vxor $out0,$out0,$out0 - vspltisb $out1,-1 - vperm $out0,$out0,$out1,$inpperm - vsel $out0,$in0,$tmp,$out0 - vxor $out0,$out0,$twk0 - - subi r30,$out,1 - mtctr $taillen -Loop_xts_dec6x_steal: - lbzu r0,1(r30) - stb r0,16(r30) - bdnz Loop_xts_dec6x_steal - - li $taillen,0 - mtctr $rounds - b Loop_xts_dec1x # one more time... - -.align 4 -Lxts_dec6x_done: - ${UCMP}i $ivp,0 - beq Lxts_dec6x_ret - - vxor $tweak,$twk0,$rndkey0 - le?vperm $tweak,$tweak,$tweak,$leperm - stvx_u $tweak,0,$ivp - -Lxts_dec6x_ret: - mtlr r11 - li r10,`$FRAME+15` - li r11,`$FRAME+31` - stvx $seven,r10,$sp # wipe copies of round keys - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - stvx $seven,r10,$sp - addi r10,r10,32 - stvx $seven,r11,$sp - addi r11,r11,32 - - mtspr 256,$vrsave - lvx v20,r10,$sp # ABI says so - addi r10,r10,32 - lvx v21,r11,$sp - addi r11,r11,32 - lvx v22,r10,$sp - addi r10,r10,32 - lvx v23,r11,$sp - addi r11,r11,32 - lvx v24,r10,$sp - addi r10,r10,32 - lvx v25,r11,$sp - addi r11,r11,32 - lvx v26,r10,$sp - addi r10,r10,32 - lvx v27,r11,$sp - addi r11,r11,32 - lvx v28,r10,$sp - addi r10,r10,32 - lvx v29,r11,$sp - addi r11,r11,32 - lvx v30,r10,$sp - lvx v31,r11,$sp - $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) - $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) - $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) - $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) - $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) - $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) - addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` - blr - .long 0 - .byte 0,12,0x04,1,0x80,6,6,0 - .long 0 - -.align 5 -_aesp8_xts_dec5x: - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - lvx v24,$x20,$key_ # round[3] - addi $key_,$key_,0x20 - - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - lvx v25,$x10,$key_ # round[4] - bdnz _aesp8_xts_dec5x - - subi r0,$taillen,1 - vncipher $out0,$out0,v24 - vncipher $out1,$out1,v24 - vncipher $out2,$out2,v24 - vncipher $out3,$out3,v24 - vncipher $out4,$out4,v24 - - andi. r0,r0,16 - cmpwi $taillen,0 - vncipher $out0,$out0,v25 - vncipher $out1,$out1,v25 - vncipher $out2,$out2,v25 - vncipher $out3,$out3,v25 - vncipher $out4,$out4,v25 - vxor $twk0,$twk0,v31 - - sub $inp,$inp,r0 - vncipher $out0,$out0,v26 - vncipher $out1,$out1,v26 - vncipher $out2,$out2,v26 - vncipher $out3,$out3,v26 - vncipher $out4,$out4,v26 - vxor $in1,$twk1,v31 - - vncipher $out0,$out0,v27 - lvx_u $in0,0,$inp - vncipher $out1,$out1,v27 - vncipher $out2,$out2,v27 - vncipher $out3,$out3,v27 - vncipher $out4,$out4,v27 - vxor $in2,$twk2,v31 - - addi $key_,$sp,$FRAME+15 # rewind $key_ - vncipher $out0,$out0,v28 - vncipher $out1,$out1,v28 - vncipher $out2,$out2,v28 - vncipher $out3,$out3,v28 - vncipher $out4,$out4,v28 - lvx v24,$x00,$key_ # re-pre-load round[1] - vxor $in3,$twk3,v31 - - vncipher $out0,$out0,v29 - le?vperm $in0,$in0,$in0,$leperm - vncipher $out1,$out1,v29 - vncipher $out2,$out2,v29 - vncipher $out3,$out3,v29 - vncipher $out4,$out4,v29 - lvx v25,$x10,$key_ # re-pre-load round[2] - vxor $in4,$twk4,v31 - - vncipher $out0,$out0,v30 - vncipher $out1,$out1,v30 - vncipher $out2,$out2,v30 - vncipher $out3,$out3,v30 - vncipher $out4,$out4,v30 - - vncipherlast $out0,$out0,$twk0 - vncipherlast $out1,$out1,$in1 - vncipherlast $out2,$out2,$in2 - vncipherlast $out3,$out3,$in3 - vncipherlast $out4,$out4,$in4 - mtctr $rounds - blr - .long 0 - .byte 0,12,0x14,0,0,0,0,0 -___ -}} }}} - -my $consts=1; -foreach(split("\n",$code)) { - s/\`([^\`]*)\`/eval($1)/geo; - - # constants table endian-specific conversion - if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) { - my $conv=$3; - my @bytes=(); - - # convert to endian-agnostic format - if ($1 eq "long") { - foreach (split(/,\s*/,$2)) { - my $l = /^0/?oct:int; - push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff; - } - } else { - @bytes = map(/^0/?oct:int,split(/,\s*/,$2)); - } - - # little-endian conversion - if ($flavour =~ /le$/o) { - SWITCH: for($conv) { - /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; - } - } - - #emit - print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n"; - next; - } - $consts=0 if (m/Lconsts:/o); # end of table - - # instructions prefixed with '?' are endian-specific and need - # to be adjusted accordingly... - if ($flavour =~ /le$/o) { # little-endian - s/le\?//o or - s/be\?/#be#/o or - s/\?lvsr/lvsl/o or - s/\?lvsl/lvsr/o or - s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or - s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or - s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o; - } else { # big-endian - s/le\?/#le#/o or - s/be\?//o or - s/\?([a-z]+)/$1/o; - } - - print $_,"\n"; -} - -close STDOUT; diff --git a/drivers/crypto/vmx/ghash.c b/drivers/crypto/vmx/ghash.c deleted file mode 100644 index 77eca20bc7..0000000000 --- a/drivers/crypto/vmx/ghash.c +++ /dev/null @@ -1,185 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * GHASH routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015, 2019 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - * - * Extended by Daniel Axtens <dja@axtens.net> to replace the fallback - * mechanism. The new approach is based on arm64 code, which is: - * Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org> - */ - -#include <linux/types.h> -#include <linux/err.h> -#include <linux/crypto.h> -#include <linux/delay.h> -#include <asm/simd.h> -#include <asm/switch_to.h> -#include <crypto/aes.h> -#include <crypto/ghash.h> -#include <crypto/scatterwalk.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/simd.h> -#include <crypto/b128ops.h> -#include "aesp8-ppc.h" - -void gcm_init_p8(u128 htable[16], const u64 Xi[2]); -void gcm_gmult_p8(u64 Xi[2], const u128 htable[16]); -void gcm_ghash_p8(u64 Xi[2], const u128 htable[16], - const u8 *in, size_t len); - -struct p8_ghash_ctx { - /* key used by vector asm */ - u128 htable[16]; - /* key used by software fallback */ - be128 key; -}; - -struct p8_ghash_desc_ctx { - u64 shash[2]; - u8 buffer[GHASH_DIGEST_SIZE]; - int bytes; -}; - -static int p8_ghash_init(struct shash_desc *desc) -{ - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - dctx->bytes = 0; - memset(dctx->shash, 0, GHASH_DIGEST_SIZE); - return 0; -} - -static int p8_ghash_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) -{ - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(tfm)); - - if (keylen != GHASH_BLOCK_SIZE) - return -EINVAL; - - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_init_p8(ctx->htable, (const u64 *) key); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - - memcpy(&ctx->key, key, GHASH_BLOCK_SIZE); - - return 0; -} - -static inline void __ghash_block(struct p8_ghash_ctx *ctx, - struct p8_ghash_desc_ctx *dctx) -{ - if (crypto_simd_usable()) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_ghash_p8(dctx->shash, ctx->htable, - dctx->buffer, GHASH_DIGEST_SIZE); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - } else { - crypto_xor((u8 *)dctx->shash, dctx->buffer, GHASH_BLOCK_SIZE); - gf128mul_lle((be128 *)dctx->shash, &ctx->key); - } -} - -static inline void __ghash_blocks(struct p8_ghash_ctx *ctx, - struct p8_ghash_desc_ctx *dctx, - const u8 *src, unsigned int srclen) -{ - if (crypto_simd_usable()) { - preempt_disable(); - pagefault_disable(); - enable_kernel_vsx(); - gcm_ghash_p8(dctx->shash, ctx->htable, - src, srclen); - disable_kernel_vsx(); - pagefault_enable(); - preempt_enable(); - } else { - while (srclen >= GHASH_BLOCK_SIZE) { - crypto_xor((u8 *)dctx->shash, src, GHASH_BLOCK_SIZE); - gf128mul_lle((be128 *)dctx->shash, &ctx->key); - srclen -= GHASH_BLOCK_SIZE; - src += GHASH_BLOCK_SIZE; - } - } -} - -static int p8_ghash_update(struct shash_desc *desc, - const u8 *src, unsigned int srclen) -{ - unsigned int len; - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - if (dctx->bytes) { - if (dctx->bytes + srclen < GHASH_DIGEST_SIZE) { - memcpy(dctx->buffer + dctx->bytes, src, - srclen); - dctx->bytes += srclen; - return 0; - } - memcpy(dctx->buffer + dctx->bytes, src, - GHASH_DIGEST_SIZE - dctx->bytes); - - __ghash_block(ctx, dctx); - - src += GHASH_DIGEST_SIZE - dctx->bytes; - srclen -= GHASH_DIGEST_SIZE - dctx->bytes; - dctx->bytes = 0; - } - len = srclen & ~(GHASH_DIGEST_SIZE - 1); - if (len) { - __ghash_blocks(ctx, dctx, src, len); - src += len; - srclen -= len; - } - if (srclen) { - memcpy(dctx->buffer, src, srclen); - dctx->bytes = srclen; - } - return 0; -} - -static int p8_ghash_final(struct shash_desc *desc, u8 *out) -{ - int i; - struct p8_ghash_ctx *ctx = crypto_tfm_ctx(crypto_shash_tfm(desc->tfm)); - struct p8_ghash_desc_ctx *dctx = shash_desc_ctx(desc); - - if (dctx->bytes) { - for (i = dctx->bytes; i < GHASH_DIGEST_SIZE; i++) - dctx->buffer[i] = 0; - __ghash_block(ctx, dctx); - dctx->bytes = 0; - } - memcpy(out, dctx->shash, GHASH_DIGEST_SIZE); - return 0; -} - -struct shash_alg p8_ghash_alg = { - .digestsize = GHASH_DIGEST_SIZE, - .init = p8_ghash_init, - .update = p8_ghash_update, - .final = p8_ghash_final, - .setkey = p8_ghash_setkey, - .descsize = sizeof(struct p8_ghash_desc_ctx) - + sizeof(struct ghash_desc_ctx), - .base = { - .cra_name = "ghash", - .cra_driver_name = "p8_ghash", - .cra_priority = 1000, - .cra_blocksize = GHASH_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct p8_ghash_ctx), - .cra_module = THIS_MODULE, - }, -}; diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl deleted file mode 100644 index 041e633c21..0000000000 --- a/drivers/crypto/vmx/ghashp8-ppc.pl +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 - -# This code is taken from the OpenSSL project but the author (Andy Polyakov) -# has relicensed it under the GPLv2. Therefore this program is free software; -# you can redistribute it and/or modify it under the terms of the GNU General -# Public License version 2 as published by the Free Software Foundation. -# -# The original headers, including the original license headers, are -# included below for completeness. - -# ==================================================================== -# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -# project. The module is, however, dual licensed under OpenSSL and -# CRYPTOGAMS licenses depending on where you obtain it. For further -# details see https://www.openssl.org/~appro/cryptogams/. -# ==================================================================== -# -# GHASH for PowerISA v2.07. -# -# July 2014 -# -# Accurate performance measurements are problematic, because it's -# always virtualized setup with possibly throttled processor. -# Relative comparison is therefore more informative. This initial -# version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x -# faster than "4-bit" integer-only compiler-generated 64-bit code. -# "Initial version" means that there is room for futher improvement. - -$flavour=shift; -$output =shift; - -if ($flavour =~ /64/) { - $SIZE_T=8; - $LRSAVE=2*$SIZE_T; - $STU="stdu"; - $POP="ld"; - $PUSH="std"; -} elsif ($flavour =~ /32/) { - $SIZE_T=4; - $LRSAVE=$SIZE_T; - $STU="stwu"; - $POP="lwz"; - $PUSH="stw"; -} else { die "nonsense $flavour"; } - -$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; -( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or -( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or -die "can't locate ppc-xlate.pl"; - -open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!"; - -my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block - -my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3)); -my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12)); -my $vrsave="r12"; - -$code=<<___; -.machine "any" - -.text - -.globl .gcm_init_p8 - lis r0,0xfff0 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $H,0,r4 # load H - le?xor r7,r7,r7 - le?addi r7,r7,0x8 # need a vperm start with 08 - le?lvsr 5,0,r7 - le?vspltisb 6,0x0f - le?vxor 5,5,6 # set a b-endian mask - le?vperm $H,$H,$H,5 - - vspltisb $xC2,-16 # 0xf0 - vspltisb $t0,1 # one - vaddubm $xC2,$xC2,$xC2 # 0xe0 - vxor $zero,$zero,$zero - vor $xC2,$xC2,$t0 # 0xe1 - vsldoi $xC2,$xC2,$zero,15 # 0xe1... - vsldoi $t1,$zero,$t0,1 # ...1 - vaddubm $xC2,$xC2,$xC2 # 0xc2... - vspltisb $t2,7 - vor $xC2,$xC2,$t1 # 0xc2....01 - vspltb $t1,$H,0 # most significant byte - vsl $H,$H,$t0 # H<<=1 - vsrab $t1,$t1,$t2 # broadcast carry bit - vand $t1,$t1,$xC2 - vxor $H,$H,$t1 # twisted H - - vsldoi $H,$H,$H,8 # twist even more ... - vsldoi $xC2,$zero,$xC2,8 # 0xc2.0 - vsldoi $Hl,$zero,$H,8 # ... and split - vsldoi $Hh,$H,$zero,8 - - stvx_u $xC2,0,r3 # save pre-computed table - stvx_u $Hl,r8,r3 - stvx_u $H, r9,r3 - stvx_u $Hh,r10,r3 - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,2,0 - .long 0 -.size .gcm_init_p8,.-.gcm_init_p8 - -.globl .gcm_gmult_p8 - lis r0,0xfff8 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $IN,0,$Xip # load Xi - - lvx_u $Hl,r8,$Htbl # load pre-computed table - le?lvsl $lemask,r0,r0 - lvx_u $H, r9,$Htbl - le?vspltisb $t0,0x07 - lvx_u $Hh,r10,$Htbl - le?vxor $lemask,$lemask,$t0 - lvx_u $xC2,0,$Htbl - le?vperm $IN,$IN,$IN,$lemask - vxor $zero,$zero,$zero - - vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo - vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi - vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi - - vpmsumd $t2,$Xl,$xC2 # 1st phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - - vsldoi $t1,$Xl,$Xl,8 # 2nd phase - vpmsumd $Xl,$Xl,$xC2 - vxor $t1,$t1,$Xh - vxor $Xl,$Xl,$t1 - - le?vperm $Xl,$Xl,$Xl,$lemask - stvx_u $Xl,0,$Xip # write out Xi - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,2,0 - .long 0 -.size .gcm_gmult_p8,.-.gcm_gmult_p8 - -.globl .gcm_ghash_p8 - lis r0,0xfff8 - li r8,0x10 - mfspr $vrsave,256 - li r9,0x20 - mtspr 256,r0 - li r10,0x30 - lvx_u $Xl,0,$Xip # load Xi - - lvx_u $Hl,r8,$Htbl # load pre-computed table - le?lvsl $lemask,r0,r0 - lvx_u $H, r9,$Htbl - le?vspltisb $t0,0x07 - lvx_u $Hh,r10,$Htbl - le?vxor $lemask,$lemask,$t0 - lvx_u $xC2,0,$Htbl - le?vperm $Xl,$Xl,$Xl,$lemask - vxor $zero,$zero,$zero - - lvx_u $IN,0,$inp - addi $inp,$inp,16 - subi $len,$len,16 - le?vperm $IN,$IN,$IN,$lemask - vxor $IN,$IN,$Xl - b Loop - -.align 5 -Loop: - subic $len,$len,16 - vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo - subfe. r0,r0,r0 # borrow?-1:0 - vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi - and r0,r0,$len - vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi - add $inp,$inp,r0 - - vpmsumd $t2,$Xl,$xC2 # 1st phase - - vsldoi $t0,$Xm,$zero,8 - vsldoi $t1,$zero,$Xm,8 - vxor $Xl,$Xl,$t0 - vxor $Xh,$Xh,$t1 - - vsldoi $Xl,$Xl,$Xl,8 - vxor $Xl,$Xl,$t2 - lvx_u $IN,0,$inp - addi $inp,$inp,16 - - vsldoi $t1,$Xl,$Xl,8 # 2nd phase - vpmsumd $Xl,$Xl,$xC2 - le?vperm $IN,$IN,$IN,$lemask - vxor $t1,$t1,$Xh - vxor $IN,$IN,$t1 - vxor $IN,$IN,$Xl - beq Loop # did $len-=16 borrow? - - vxor $Xl,$Xl,$t1 - le?vperm $Xl,$Xl,$Xl,$lemask - stvx_u $Xl,0,$Xip # write out Xi - - mtspr 256,$vrsave - blr - .long 0 - .byte 0,12,0x14,0,0,0,4,0 - .long 0 -.size .gcm_ghash_p8,.-.gcm_ghash_p8 - -.asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>" -.align 2 -___ - -foreach (split("\n",$code)) { - if ($flavour =~ /le$/o) { # little-endian - s/le\?//o or - s/be\?/#be#/o; - } else { - s/le\?/#le#/o or - s/be\?//o; - } - print $_,"\n"; -} - -close STDOUT; # enforce flush diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl deleted file mode 100644 index b583898c11..0000000000 --- a/drivers/crypto/vmx/ppc-xlate.pl +++ /dev/null @@ -1,231 +0,0 @@ -#!/usr/bin/env perl -# SPDX-License-Identifier: GPL-2.0 - -# PowerPC assembler distiller by <appro>. - -my $flavour = shift; -my $output = shift; -open STDOUT,">$output" || die "can't open $output: $!"; - -my %GLOBALS; -my $dotinlocallabels=($flavour=~/linux/)?1:0; -my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0; -my $dotfunctions=($elfv2abi=~1)?0:1; - -################################################################ -# directives which need special treatment on different platforms -################################################################ -my $globl = sub { - my $junk = shift; - my $name = shift; - my $global = \$GLOBALS{$name}; - my $ret; - - $name =~ s|^[\.\_]||; - - SWITCH: for ($flavour) { - /aix/ && do { $name = ".$name"; - last; - }; - /osx/ && do { $name = "_$name"; - last; - }; - /linux/ - && do { $ret = "_GLOBAL($name)"; - last; - }; - } - - $ret = ".globl $name\nalign 5\n$name:" if (!$ret); - $$global = $name; - $ret; -}; -my $text = sub { - my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text"; - $ret = ".abiversion 2\n".$ret if ($elfv2abi); - $ret; -}; -my $machine = sub { - my $junk = shift; - my $arch = shift; - if ($flavour =~ /osx/) - { $arch =~ s/\"//g; - $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); - } - ".machine $arch"; -}; -my $size = sub { - if ($flavour =~ /linux/) - { shift; - my $name = shift; $name =~ s|^[\.\_]||; - my $ret = ".size $name,.-".($dotfunctions?".":"").$name; - $ret .= "\n.size .$name,.-.$name" if ($dotfunctions); - $ret; - } - else - { ""; } -}; -my $asciz = sub { - shift; - my $line = join(",",@_); - if ($line =~ /^"(.*)"$/) - { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } - else - { ""; } -}; -my $quad = sub { - shift; - my @ret; - my ($hi,$lo); - for (@_) { - if (/^0x([0-9a-f]*?)([0-9a-f]{1,8})$/io) - { $hi=$1?"0x$1":"0"; $lo="0x$2"; } - elsif (/^([0-9]+)$/o) - { $hi=$1>>32; $lo=$1&0xffffffff; } # error-prone with 32-bit perl - else - { $hi=undef; $lo=$_; } - - if (defined($hi)) - { push(@ret,$flavour=~/le$/o?".long\t$lo,$hi":".long\t$hi,$lo"); } - else - { push(@ret,".quad $lo"); } - } - join("\n",@ret); -}; - -################################################################ -# simplified mnemonics not handled by at least one assembler -################################################################ -my $cmplw = sub { - my $f = shift; - my $cr = 0; $cr = shift if ($#_>1); - # Some out-of-date 32-bit GNU assembler just can't handle cmplw... - ($flavour =~ /linux.*32/) ? - " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : - " cmplw ".join(',',$cr,@_); -}; -my $bdnz = sub { - my $f = shift; - my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint - " bc $bo,0,".shift; -} if ($flavour!~/linux/); -my $bltlr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : - " bclr $bo,0"; -}; -my $bnelr = sub { - my $f = shift; - my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -my $beqlr = sub { - my $f = shift; - my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint - ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints - " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : - " bclr $bo,2"; -}; -# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two -# arguments is 64, with "operand out of range" error. -my $extrdi = sub { - my ($f,$ra,$rs,$n,$b) = @_; - $b = ($b+$n)&63; $n = 64-$n; - " rldicl $ra,$rs,$b,$n"; -}; -my $vmr = sub { - my ($f,$vx,$vy) = @_; - " vor $vx,$vy,$vy"; -}; - -# Some ABIs specify vrsave, special-purpose register #256, as reserved -# for system use. -my $no_vrsave = ($elfv2abi); -my $mtspr = sub { - my ($f,$idx,$ra) = @_; - if ($idx == 256 && $no_vrsave) { - " or $ra,$ra,$ra"; - } else { - " mtspr $idx,$ra"; - } -}; -my $mfspr = sub { - my ($f,$rd,$idx) = @_; - if ($idx == 256 && $no_vrsave) { - " li $rd,-1"; - } else { - " mfspr $rd,$idx"; - } -}; - -# PowerISA 2.06 stuff -sub vsxmem_op { - my ($f, $vrt, $ra, $rb, $op) = @_; - " .long ".sprintf "0x%X",(31<<26)|($vrt<<21)|($ra<<16)|($rb<<11)|($op*2+1); -} -# made-up unaligned memory reference AltiVec/VMX instructions -my $lvx_u = sub { vsxmem_op(@_, 844); }; # lxvd2x -my $stvx_u = sub { vsxmem_op(@_, 972); }; # stxvd2x -my $lvdx_u = sub { vsxmem_op(@_, 588); }; # lxsdx -my $stvdx_u = sub { vsxmem_op(@_, 716); }; # stxsdx -my $lvx_4w = sub { vsxmem_op(@_, 780); }; # lxvw4x -my $stvx_4w = sub { vsxmem_op(@_, 908); }; # stxvw4x - -# PowerISA 2.07 stuff -sub vcrypto_op { - my ($f, $vrt, $vra, $vrb, $op) = @_; - " .long ".sprintf "0x%X",(4<<26)|($vrt<<21)|($vra<<16)|($vrb<<11)|$op; -} -my $vcipher = sub { vcrypto_op(@_, 1288); }; -my $vcipherlast = sub { vcrypto_op(@_, 1289); }; -my $vncipher = sub { vcrypto_op(@_, 1352); }; -my $vncipherlast= sub { vcrypto_op(@_, 1353); }; -my $vsbox = sub { vcrypto_op(@_, 0, 1480); }; -my $vshasigmad = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1730); }; -my $vshasigmaw = sub { my ($st,$six)=splice(@_,-2); vcrypto_op(@_, $st<<4|$six, 1666); }; -my $vpmsumb = sub { vcrypto_op(@_, 1032); }; -my $vpmsumd = sub { vcrypto_op(@_, 1224); }; -my $vpmsubh = sub { vcrypto_op(@_, 1096); }; -my $vpmsumw = sub { vcrypto_op(@_, 1160); }; -my $vaddudm = sub { vcrypto_op(@_, 192); }; -my $vadduqm = sub { vcrypto_op(@_, 256); }; - -my $mtsle = sub { - my ($f, $arg) = @_; - " .long ".sprintf "0x%X",(31<<26)|($arg<<21)|(147*2); -}; - -print "#include <asm/ppc_asm.h>\n" if $flavour =~ /linux/; - -while($line=<>) { - - $line =~ s|[#!;].*$||; # get rid of asm-style comments... - $line =~ s|/\*.*\*/||; # ... and C-style comments... - $line =~ s|^\s+||; # ... and skip white spaces in beginning... - $line =~ s|\s+$||; # ... and at the end - - { - $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel - $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); - } - - { - $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; - my $c = $1; $c = "\t" if ($c eq ""); - my $mnemonic = $2; - my $f = $3; - my $opcode = eval("\$$mnemonic"); - $line =~ s/\b(c?[rf]|v|vs)([0-9]+)\b/$2/g if ($c ne "." and $flavour !~ /osx/); - if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } - elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } - } - - print $line if ($line); - print "\n"; -} - -close STDOUT; diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c deleted file mode 100644 index 7eb713cc87..0000000000 --- a/drivers/crypto/vmx/vmx.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Routines supporting VMX instructions on the Power 8 - * - * Copyright (C) 2015 International Business Machines Inc. - * - * Author: Marcelo Henrique Cerri <mhcerri@br.ibm.com> - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> -#include <linux/types.h> -#include <linux/err.h> -#include <linux/cpufeature.h> -#include <linux/crypto.h> -#include <asm/cputable.h> -#include <crypto/internal/hash.h> -#include <crypto/internal/skcipher.h> - -#include "aesp8-ppc.h" - -static int __init p8_init(void) -{ - int ret; - - ret = crypto_register_shash(&p8_ghash_alg); - if (ret) - goto err; - - ret = crypto_register_alg(&p8_aes_alg); - if (ret) - goto err_unregister_ghash; - - ret = crypto_register_skcipher(&p8_aes_cbc_alg); - if (ret) - goto err_unregister_aes; - - ret = crypto_register_skcipher(&p8_aes_ctr_alg); - if (ret) - goto err_unregister_aes_cbc; - - ret = crypto_register_skcipher(&p8_aes_xts_alg); - if (ret) - goto err_unregister_aes_ctr; - - return 0; - -err_unregister_aes_ctr: - crypto_unregister_skcipher(&p8_aes_ctr_alg); -err_unregister_aes_cbc: - crypto_unregister_skcipher(&p8_aes_cbc_alg); -err_unregister_aes: - crypto_unregister_alg(&p8_aes_alg); -err_unregister_ghash: - crypto_unregister_shash(&p8_ghash_alg); -err: - return ret; -} - -static void __exit p8_exit(void) -{ - crypto_unregister_skcipher(&p8_aes_xts_alg); - crypto_unregister_skcipher(&p8_aes_ctr_alg); - crypto_unregister_skcipher(&p8_aes_cbc_alg); - crypto_unregister_alg(&p8_aes_alg); - crypto_unregister_shash(&p8_ghash_alg); -} - -module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, p8_init); -module_exit(p8_exit); - -MODULE_AUTHOR("Marcelo Cerri<mhcerri@br.ibm.com>"); -MODULE_DESCRIPTION("IBM VMX cryptographic acceleration instructions " - "support on Power 8"); -MODULE_LICENSE("GPL"); -MODULE_VERSION("1.0.0"); -MODULE_IMPORT_NS(CRYPTO_INTERNAL); |