diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 10:05:51 +0000 |
commit | 5d1646d90e1f2cceb9f0828f4b28318cd0ec7744 (patch) | |
tree | a94efe259b9009378be6d90eb30d2b019d95c194 /drivers/crypto/ccp | |
parent | Initial commit. (diff) | |
download | linux-upstream/5.10.209.tar.xz linux-upstream/5.10.209.zip |
Adding upstream version 5.10.209.upstream/5.10.209upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/crypto/ccp')
28 files changed, 13034 insertions, 0 deletions
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig new file mode 100644 index 000000000..32268e239 --- /dev/null +++ b/drivers/crypto/ccp/Kconfig @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0-only +config CRYPTO_DEV_CCP_DD + tristate "Secure Processor device driver" + depends on CPU_SUP_AMD || ARM64 + default m + help + Provides AMD Secure Processor device driver. + If you choose 'M' here, this module will be called ccp. + +config CRYPTO_DEV_SP_CCP + bool "Cryptographic Coprocessor device" + default y + depends on CRYPTO_DEV_CCP_DD && DMADEVICES + select HW_RANDOM + select DMA_ENGINE + select CRYPTO_SHA1 + select CRYPTO_SHA256 + help + Provides the support for AMD Cryptographic Coprocessor (CCP) device + which can be used to offload encryption operations such as SHA, AES + and more. + +config CRYPTO_DEV_CCP_CRYPTO + tristate "Encryption and hashing offload support" + default m + depends on CRYPTO_DEV_CCP_DD + depends on CRYPTO_DEV_SP_CCP + select CRYPTO_HASH + select CRYPTO_SKCIPHER + select CRYPTO_AUTHENC + select CRYPTO_RSA + select CRYPTO_LIB_AES + help + Support for using the cryptographic API with the AMD Cryptographic + Coprocessor. This module supports offload of SHA and AES algorithms. + If you choose 'M' here, this module will be called ccp_crypto. + +config CRYPTO_DEV_SP_PSP + bool "Platform Security Processor (PSP) device" + default y + depends on CRYPTO_DEV_CCP_DD && X86_64 + help + Provide support for the AMD Platform Security Processor (PSP). + The PSP is a dedicated processor that provides support for key + management commands in Secure Encrypted Virtualization (SEV) mode, + along with software-based Trusted Execution Environment (TEE) to + enable third-party trusted applications. + +config CRYPTO_DEV_CCP_DEBUGFS + bool "Enable CCP Internals in DebugFS" + default n + depends on CRYPTO_DEV_SP_CCP + help + Expose CCP device information such as operation statistics, feature + information, and descriptor queue contents. diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile new file mode 100644 index 000000000..db362fe47 --- /dev/null +++ b/drivers/crypto/ccp/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o +ccp-objs := sp-dev.o sp-platform.o +ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \ + ccp-ops.o \ + ccp-dev-v3.o \ + ccp-dev-v5.o \ + ccp-dmaengine.o +ccp-$(CONFIG_CRYPTO_DEV_CCP_DEBUGFS) += ccp-debugfs.o +ccp-$(CONFIG_PCI) += sp-pci.o +ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o \ + sev-dev.o \ + tee-dev.o + +obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o +ccp-crypto-objs := ccp-crypto-main.o \ + ccp-crypto-aes.o \ + ccp-crypto-aes-cmac.o \ + ccp-crypto-aes-xts.o \ + ccp-crypto-aes-galois.o \ + ccp-crypto-des3.o \ + ccp-crypto-rsa.o \ + ccp-crypto-sha.o diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c new file mode 100644 index 000000000..11a305fa1 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) AES CMAC crypto API support + * + * Copyright (C) 2013,2018 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/hash.h> +#include <crypto/internal/hash.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, + int ret) +{ + struct ahash_request *req = ahash_request_cast(async_req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + unsigned int digest_size = crypto_ahash_digestsize(tfm); + + if (ret) + goto e_free; + + if (rctx->hash_rem) { + /* Save remaining data to buffer */ + unsigned int offset = rctx->nbytes - rctx->hash_rem; + + scatterwalk_map_and_copy(rctx->buf, rctx->src, + offset, rctx->hash_rem, 0); + rctx->buf_count = rctx->hash_rem; + } else { + rctx->buf_count = 0; + } + + /* Update result area if supplied */ + if (req->result && rctx->final) + memcpy(req->result, rctx->iv, digest_size); + +e_free: + sg_free_table(&rctx->data_sg); + + return ret; +} + +static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes, + unsigned int final) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct scatterlist *sg, *cmac_key_sg = NULL; + unsigned int block_size = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + unsigned int need_pad, sg_count; + gfp_t gfp; + u64 len; + int ret; + + if (!ctx->u.aes.key_len) + return -EINVAL; + + if (nbytes) + rctx->null_msg = 0; + + len = (u64)rctx->buf_count + (u64)nbytes; + + if (!final && (len <= block_size)) { + scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src, + 0, nbytes, 0); + rctx->buf_count += nbytes; + + return 0; + } + + rctx->src = req->src; + rctx->nbytes = nbytes; + + rctx->final = final; + rctx->hash_rem = final ? 0 : len & (block_size - 1); + rctx->hash_cnt = len - rctx->hash_rem; + if (!final && !rctx->hash_rem) { + /* CCP can't do zero length final, so keep some data around */ + rctx->hash_cnt -= block_size; + rctx->hash_rem = block_size; + } + + if (final && (rctx->null_msg || (len & (block_size - 1)))) + need_pad = 1; + else + need_pad = 0; + + sg_init_one(&rctx->iv_sg, rctx->iv, sizeof(rctx->iv)); + + /* Build the data scatterlist table - allocate enough entries for all + * possible data pieces (buffer, input data, padding) + */ + sg_count = (nbytes) ? sg_nents(req->src) + 2 : 2; + gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + GFP_KERNEL : GFP_ATOMIC; + ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp); + if (ret) + return ret; + + sg = NULL; + if (rctx->buf_count) { + sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); + sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg); + if (!sg) { + ret = -EINVAL; + goto e_free; + } + } + + if (nbytes) { + sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src); + if (!sg) { + ret = -EINVAL; + goto e_free; + } + } + + if (need_pad) { + int pad_length = block_size - (len & (block_size - 1)); + + rctx->hash_cnt += pad_length; + + memset(rctx->pad, 0, sizeof(rctx->pad)); + rctx->pad[0] = 0x80; + sg_init_one(&rctx->pad_sg, rctx->pad, pad_length); + sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg); + if (!sg) { + ret = -EINVAL; + goto e_free; + } + } + if (sg) { + sg_mark_end(sg); + sg = rctx->data_sg.sgl; + } + + /* Initialize the K1/K2 scatterlist */ + if (final) + cmac_key_sg = (need_pad) ? &ctx->u.aes.k2_sg + : &ctx->u.aes.k1_sg; + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_AES; + rctx->cmd.u.aes.type = ctx->u.aes.type; + rctx->cmd.u.aes.mode = ctx->u.aes.mode; + rctx->cmd.u.aes.action = CCP_AES_ACTION_ENCRYPT; + rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; + rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; + rctx->cmd.u.aes.iv = &rctx->iv_sg; + rctx->cmd.u.aes.iv_len = AES_BLOCK_SIZE; + rctx->cmd.u.aes.src = sg; + rctx->cmd.u.aes.src_len = rctx->hash_cnt; + rctx->cmd.u.aes.dst = NULL; + rctx->cmd.u.aes.cmac_key = cmac_key_sg; + rctx->cmd.u.aes.cmac_key_len = ctx->u.aes.kn_len; + rctx->cmd.u.aes.cmac_final = final; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; + +e_free: + sg_free_table(&rctx->data_sg); + + return ret; +} + +static int ccp_aes_cmac_init(struct ahash_request *req) +{ + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + + memset(rctx, 0, sizeof(*rctx)); + + rctx->null_msg = 1; + + return 0; +} + +static int ccp_aes_cmac_update(struct ahash_request *req) +{ + return ccp_do_cmac_update(req, req->nbytes, 0); +} + +static int ccp_aes_cmac_final(struct ahash_request *req) +{ + return ccp_do_cmac_update(req, 0, 1); +} + +static int ccp_aes_cmac_finup(struct ahash_request *req) +{ + return ccp_do_cmac_update(req, req->nbytes, 1); +} + +static int ccp_aes_cmac_digest(struct ahash_request *req) +{ + int ret; + + ret = ccp_aes_cmac_init(req); + if (ret) + return ret; + + return ccp_aes_cmac_finup(req); +} + +static int ccp_aes_cmac_export(struct ahash_request *req, void *out) +{ + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_exp_ctx state; + + /* Don't let anything leak to 'out' */ + memset(&state, 0, sizeof(state)); + + state.null_msg = rctx->null_msg; + memcpy(state.iv, rctx->iv, sizeof(state.iv)); + state.buf_count = rctx->buf_count; + memcpy(state.buf, rctx->buf, sizeof(state.buf)); + + /* 'out' may not be aligned so memcpy from local variable */ + memcpy(out, &state, sizeof(state)); + + return 0; +} + +static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) +{ + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_exp_ctx state; + + /* 'in' may not be aligned so memcpy to local variable */ + memcpy(&state, in, sizeof(state)); + + memset(rctx, 0, sizeof(*rctx)); + rctx->null_msg = state.null_msg; + memcpy(rctx->iv, state.iv, sizeof(rctx->iv)); + rctx->buf_count = state.buf_count; + memcpy(rctx->buf, state.buf, sizeof(rctx->buf)); + + return 0; +} + +static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct ccp_crypto_ahash_alg *alg = + ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); + u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo; + u64 rb_hi = 0x00, rb_lo = 0x87; + struct crypto_aes_ctx aes; + __be64 *gk; + int ret; + + switch (key_len) { + case AES_KEYSIZE_128: + ctx->u.aes.type = CCP_AES_TYPE_128; + break; + case AES_KEYSIZE_192: + ctx->u.aes.type = CCP_AES_TYPE_192; + break; + case AES_KEYSIZE_256: + ctx->u.aes.type = CCP_AES_TYPE_256; + break; + default: + return -EINVAL; + } + ctx->u.aes.mode = alg->mode; + + /* Set to zero until complete */ + ctx->u.aes.key_len = 0; + + /* Set the key for the AES cipher used to generate the keys */ + ret = aes_expandkey(&aes, key, key_len); + if (ret) + return ret; + + /* Encrypt a block of zeroes - use key area in context */ + memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key)); + aes_encrypt(&aes, ctx->u.aes.key, ctx->u.aes.key); + memzero_explicit(&aes, sizeof(aes)); + + /* Generate K1 and K2 */ + k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key)); + k0_lo = be64_to_cpu(*((__be64 *)ctx->u.aes.key + 1)); + + k1_hi = (k0_hi << 1) | (k0_lo >> 63); + k1_lo = k0_lo << 1; + if (ctx->u.aes.key[0] & 0x80) { + k1_hi ^= rb_hi; + k1_lo ^= rb_lo; + } + gk = (__be64 *)ctx->u.aes.k1; + *gk = cpu_to_be64(k1_hi); + gk++; + *gk = cpu_to_be64(k1_lo); + + k2_hi = (k1_hi << 1) | (k1_lo >> 63); + k2_lo = k1_lo << 1; + if (ctx->u.aes.k1[0] & 0x80) { + k2_hi ^= rb_hi; + k2_lo ^= rb_lo; + } + gk = (__be64 *)ctx->u.aes.k2; + *gk = cpu_to_be64(k2_hi); + gk++; + *gk = cpu_to_be64(k2_lo); + + ctx->u.aes.kn_len = sizeof(ctx->u.aes.k1); + sg_init_one(&ctx->u.aes.k1_sg, ctx->u.aes.k1, sizeof(ctx->u.aes.k1)); + sg_init_one(&ctx->u.aes.k2_sg, ctx->u.aes.k2, sizeof(ctx->u.aes.k2)); + + /* Save the supplied key */ + memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key)); + memcpy(ctx->u.aes.key, key, key_len); + ctx->u.aes.key_len = key_len; + sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + + return ret; +} + +static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + + ctx->complete = ccp_aes_cmac_complete; + ctx->u.aes.key_len = 0; + + crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); + + return 0; +} + +int ccp_register_aes_cmac_algs(struct list_head *head) +{ + struct ccp_crypto_ahash_alg *ccp_alg; + struct ahash_alg *alg; + struct hash_alg_common *halg; + struct crypto_alg *base; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + ccp_alg->mode = CCP_AES_MODE_CMAC; + + alg = &ccp_alg->alg; + alg->init = ccp_aes_cmac_init; + alg->update = ccp_aes_cmac_update; + alg->final = ccp_aes_cmac_final; + alg->finup = ccp_aes_cmac_finup; + alg->digest = ccp_aes_cmac_digest; + alg->export = ccp_aes_cmac_export; + alg->import = ccp_aes_cmac_import; + alg->setkey = ccp_aes_cmac_setkey; + + halg = &alg->halg; + halg->digestsize = AES_BLOCK_SIZE; + halg->statesize = sizeof(struct ccp_aes_cmac_exp_ctx); + + base = &halg->base; + snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)"); + snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp"); + base->cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK; + base->cra_blocksize = AES_BLOCK_SIZE; + base->cra_ctxsize = sizeof(struct ccp_ctx); + base->cra_priority = CCP_CRA_PRIORITY; + base->cra_init = ccp_aes_cmac_cra_init; + base->cra_module = THIS_MODULE; + + ret = crypto_register_ahash(alg); + if (ret) { + pr_err("%s ahash algorithm registration error (%d)\n", + base->cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c new file mode 100644 index 000000000..1c1c939f5 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) AES GCM crypto API support + * + * Copyright (C) 2016,2017 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/internal/aead.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/ctr.h> +#include <crypto/gcm.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +static int ccp_aes_gcm_complete(struct crypto_async_request *async_req, int ret) +{ + return ret; +} + +static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_aead_ctx(tfm); + + switch (key_len) { + case AES_KEYSIZE_128: + ctx->u.aes.type = CCP_AES_TYPE_128; + break; + case AES_KEYSIZE_192: + ctx->u.aes.type = CCP_AES_TYPE_192; + break; + case AES_KEYSIZE_256: + ctx->u.aes.type = CCP_AES_TYPE_256; + break; + default: + return -EINVAL; + } + + ctx->u.aes.mode = CCP_AES_MODE_GCM; + ctx->u.aes.key_len = key_len; + + memcpy(ctx->u.aes.key, key, key_len); + sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + + return 0; +} + +static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + switch (authsize) { + case 16: + case 15: + case 14: + case 13: + case 12: + case 8: + case 4: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct ccp_ctx *ctx = crypto_aead_ctx(tfm); + struct ccp_aes_req_ctx *rctx = aead_request_ctx(req); + struct scatterlist *iv_sg = NULL; + unsigned int iv_len = 0; + int i; + int ret = 0; + + if (!ctx->u.aes.key_len) + return -EINVAL; + + if (ctx->u.aes.mode != CCP_AES_MODE_GCM) + return -EINVAL; + + if (!req->iv) + return -EINVAL; + + /* + * 5 parts: + * plaintext/ciphertext input + * AAD + * key + * IV + * Destination+tag buffer + */ + + /* Prepare the IV: 12 bytes + an integer (counter) */ + memcpy(rctx->iv, req->iv, GCM_AES_IV_SIZE); + for (i = 0; i < 3; i++) + rctx->iv[i + GCM_AES_IV_SIZE] = 0; + rctx->iv[AES_BLOCK_SIZE - 1] = 1; + + /* Set up a scatterlist for the IV */ + iv_sg = &rctx->iv_sg; + iv_len = AES_BLOCK_SIZE; + sg_init_one(iv_sg, rctx->iv, iv_len); + + /* The AAD + plaintext are concatenated in the src buffer */ + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_AES; + rctx->cmd.u.aes.authsize = crypto_aead_authsize(tfm); + rctx->cmd.u.aes.type = ctx->u.aes.type; + rctx->cmd.u.aes.mode = ctx->u.aes.mode; + rctx->cmd.u.aes.action = encrypt; + rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; + rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; + rctx->cmd.u.aes.iv = iv_sg; + rctx->cmd.u.aes.iv_len = iv_len; + rctx->cmd.u.aes.src = req->src; + rctx->cmd.u.aes.src_len = req->cryptlen; + rctx->cmd.u.aes.aad_len = req->assoclen; + + /* The cipher text + the tag are in the dst buffer */ + rctx->cmd.u.aes.dst = req->dst; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_aes_gcm_encrypt(struct aead_request *req) +{ + return ccp_aes_gcm_crypt(req, CCP_AES_ACTION_ENCRYPT); +} + +static int ccp_aes_gcm_decrypt(struct aead_request *req) +{ + return ccp_aes_gcm_crypt(req, CCP_AES_ACTION_DECRYPT); +} + +static int ccp_aes_gcm_cra_init(struct crypto_aead *tfm) +{ + struct ccp_ctx *ctx = crypto_aead_ctx(tfm); + + ctx->complete = ccp_aes_gcm_complete; + ctx->u.aes.key_len = 0; + + crypto_aead_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx)); + + return 0; +} + +static void ccp_aes_gcm_cra_exit(struct crypto_tfm *tfm) +{ +} + +static struct aead_alg ccp_aes_gcm_defaults = { + .setkey = ccp_aes_gcm_setkey, + .setauthsize = ccp_aes_gcm_setauthsize, + .encrypt = ccp_aes_gcm_encrypt, + .decrypt = ccp_aes_gcm_decrypt, + .init = ccp_aes_gcm_cra_init, + .ivsize = GCM_AES_IV_SIZE, + .maxauthsize = AES_BLOCK_SIZE, + .base = { + .cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct ccp_ctx), + .cra_priority = CCP_CRA_PRIORITY, + .cra_exit = ccp_aes_gcm_cra_exit, + .cra_module = THIS_MODULE, + }, +}; + +struct ccp_aes_aead_def { + enum ccp_aes_mode mode; + unsigned int version; + const char *name; + const char *driver_name; + unsigned int blocksize; + unsigned int ivsize; + struct aead_alg *alg_defaults; +}; + +static struct ccp_aes_aead_def aes_aead_algs[] = { + { + .mode = CCP_AES_MODE_GHASH, + .version = CCP_VERSION(5, 0), + .name = "gcm(aes)", + .driver_name = "gcm-aes-ccp", + .blocksize = 1, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_gcm_defaults, + }, +}; + +static int ccp_register_aes_aead(struct list_head *head, + const struct ccp_aes_aead_def *def) +{ + struct ccp_crypto_aead *ccp_aead; + struct aead_alg *alg; + int ret; + + ccp_aead = kzalloc(sizeof(*ccp_aead), GFP_KERNEL); + if (!ccp_aead) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_aead->entry); + + ccp_aead->mode = def->mode; + + /* Copy the defaults and override as necessary */ + alg = &ccp_aead->alg; + *alg = *def->alg_defaults; + snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->driver_name); + alg->base.cra_blocksize = def->blocksize; + + ret = crypto_register_aead(alg); + if (ret) { + pr_err("%s aead algorithm registration error (%d)\n", + alg->base.cra_name, ret); + kfree(ccp_aead); + return ret; + } + + list_add(&ccp_aead->entry, head); + + return 0; +} + +int ccp_register_aes_aeads(struct list_head *head) +{ + int i, ret; + unsigned int ccpversion = ccp_version(); + + for (i = 0; i < ARRAY_SIZE(aes_aead_algs); i++) { + if (aes_aead_algs[i].version > ccpversion) + continue; + ret = ccp_register_aes_aead(head, &aes_aead_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c new file mode 100644 index 000000000..6849261ca --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support + * + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <crypto/aes.h> +#include <crypto/xts.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +struct ccp_aes_xts_def { + const char *name; + const char *drv_name; +}; + +static const struct ccp_aes_xts_def aes_xts_algs[] = { + { + .name = "xts(aes)", + .drv_name = "xts-aes-ccp", + }, +}; + +struct ccp_unit_size_map { + unsigned int size; + u32 value; +}; + +static struct ccp_unit_size_map xts_unit_sizes[] = { + { + .size = 16, + .value = CCP_XTS_AES_UNIT_SIZE_16, + }, + { + .size = 512, + .value = CCP_XTS_AES_UNIT_SIZE_512, + }, + { + .size = 1024, + .value = CCP_XTS_AES_UNIT_SIZE_1024, + }, + { + .size = 2048, + .value = CCP_XTS_AES_UNIT_SIZE_2048, + }, + { + .size = 4096, + .value = CCP_XTS_AES_UNIT_SIZE_4096, + }, +}; + +static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) +{ + struct skcipher_request *req = skcipher_request_cast(async_req); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + + if (ret) + return ret; + + memcpy(req->iv, rctx->iv, AES_BLOCK_SIZE); + + return 0; +} + +static int ccp_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + unsigned int ccpversion = ccp_version(); + int ret; + + ret = xts_verify_key(tfm, key, key_len); + if (ret) + return ret; + + /* Version 3 devices support 128-bit keys; version 5 devices can + * accommodate 128- and 256-bit keys. + */ + switch (key_len) { + case AES_KEYSIZE_128 * 2: + memcpy(ctx->u.aes.key, key, key_len); + break; + case AES_KEYSIZE_256 * 2: + if (ccpversion > CCP_VERSION(3, 0)) + memcpy(ctx->u.aes.key, key, key_len); + break; + } + ctx->u.aes.key_len = key_len / 2; + sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + + return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len); +} + +static int ccp_aes_xts_crypt(struct skcipher_request *req, + unsigned int encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + unsigned int ccpversion = ccp_version(); + unsigned int fallback = 0; + unsigned int unit; + u32 unit_size; + int ret; + + if (!ctx->u.aes.key_len) + return -EINVAL; + + if (!req->iv) + return -EINVAL; + + /* Check conditions under which the CCP can fulfill a request. The + * device can handle input plaintext of a length that is a multiple + * of the unit_size, bug the crypto implementation only supports + * the unit_size being equal to the input length. This limits the + * number of scenarios we can handle. + */ + unit_size = CCP_XTS_AES_UNIT_SIZE__LAST; + for (unit = 0; unit < ARRAY_SIZE(xts_unit_sizes); unit++) { + if (req->cryptlen == xts_unit_sizes[unit].size) { + unit_size = unit; + break; + } + } + /* The CCP has restrictions on block sizes. Also, a version 3 device + * only supports AES-128 operations; version 5 CCPs support both + * AES-128 and -256 operations. + */ + if (unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) + fallback = 1; + if ((ccpversion < CCP_VERSION(5, 0)) && + (ctx->u.aes.key_len != AES_KEYSIZE_128)) + fallback = 1; + if ((ctx->u.aes.key_len != AES_KEYSIZE_128) && + (ctx->u.aes.key_len != AES_KEYSIZE_256)) + fallback = 1; + if (fallback) { + /* Use the fallback to process the request for any + * unsupported unit sizes or key sizes + */ + skcipher_request_set_tfm(&rctx->fallback_req, + ctx->u.aes.tfm_skcipher); + skcipher_request_set_callback(&rctx->fallback_req, + req->base.flags, + req->base.complete, + req->base.data); + skcipher_request_set_crypt(&rctx->fallback_req, req->src, + req->dst, req->cryptlen, req->iv); + ret = encrypt ? crypto_skcipher_encrypt(&rctx->fallback_req) : + crypto_skcipher_decrypt(&rctx->fallback_req); + return ret; + } + + memcpy(rctx->iv, req->iv, AES_BLOCK_SIZE); + sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE); + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_XTS_AES_128; + rctx->cmd.u.xts.type = CCP_AES_TYPE_128; + rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT + : CCP_AES_ACTION_DECRYPT; + rctx->cmd.u.xts.unit_size = unit_size; + rctx->cmd.u.xts.key = &ctx->u.aes.key_sg; + rctx->cmd.u.xts.key_len = ctx->u.aes.key_len; + rctx->cmd.u.xts.iv = &rctx->iv_sg; + rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE; + rctx->cmd.u.xts.src = req->src; + rctx->cmd.u.xts.src_len = req->cryptlen; + rctx->cmd.u.xts.dst = req->dst; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_aes_xts_encrypt(struct skcipher_request *req) +{ + return ccp_aes_xts_crypt(req, 1); +} + +static int ccp_aes_xts_decrypt(struct skcipher_request *req) +{ + return ccp_aes_xts_crypt(req, 0); +} + +static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm) +{ + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct crypto_skcipher *fallback_tfm; + + ctx->complete = ccp_aes_xts_complete; + ctx->u.aes.key_len = 0; + + fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0, + CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback_tfm)) { + pr_warn("could not load fallback driver xts(aes)\n"); + return PTR_ERR(fallback_tfm); + } + ctx->u.aes.tfm_skcipher = fallback_tfm; + + crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx) + + crypto_skcipher_reqsize(fallback_tfm)); + + return 0; +} + +static void ccp_aes_xts_exit_tfm(struct crypto_skcipher *tfm) +{ + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + + crypto_free_skcipher(ctx->u.aes.tfm_skcipher); +} + +static int ccp_register_aes_xts_alg(struct list_head *head, + const struct ccp_aes_xts_def *def) +{ + struct ccp_crypto_skcipher_alg *ccp_alg; + struct skcipher_alg *alg; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + alg = &ccp_alg->alg; + + snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->drv_name); + alg->base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK; + alg->base.cra_blocksize = AES_BLOCK_SIZE; + alg->base.cra_ctxsize = sizeof(struct ccp_ctx); + alg->base.cra_priority = CCP_CRA_PRIORITY; + alg->base.cra_module = THIS_MODULE; + + alg->setkey = ccp_aes_xts_setkey; + alg->encrypt = ccp_aes_xts_encrypt; + alg->decrypt = ccp_aes_xts_decrypt; + alg->min_keysize = AES_MIN_KEY_SIZE * 2; + alg->max_keysize = AES_MAX_KEY_SIZE * 2; + alg->ivsize = AES_BLOCK_SIZE; + alg->init = ccp_aes_xts_init_tfm; + alg->exit = ccp_aes_xts_exit_tfm; + + ret = crypto_register_skcipher(alg); + if (ret) { + pr_err("%s skcipher algorithm registration error (%d)\n", + alg->base.cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} + +int ccp_register_aes_xts_algs(struct list_head *head) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(aes_xts_algs); i++) { + ret = ccp_register_aes_xts_alg(head, &aes_xts_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c new file mode 100644 index 000000000..e6dcd8ced --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) AES crypto API support + * + * Copyright (C) 2013-2019 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/ctr.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) +{ + struct skcipher_request *req = skcipher_request_cast(async_req); + struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + + if (ret) + return ret; + + if (ctx->u.aes.mode != CCP_AES_MODE_ECB) + memcpy(req->iv, rctx->iv, AES_BLOCK_SIZE); + + return 0; +} + +static int ccp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + + switch (key_len) { + case AES_KEYSIZE_128: + ctx->u.aes.type = CCP_AES_TYPE_128; + break; + case AES_KEYSIZE_192: + ctx->u.aes.type = CCP_AES_TYPE_192; + break; + case AES_KEYSIZE_256: + ctx->u.aes.type = CCP_AES_TYPE_256; + break; + default: + return -EINVAL; + } + ctx->u.aes.mode = alg->mode; + ctx->u.aes.key_len = key_len; + + memcpy(ctx->u.aes.key, key, key_len); + sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); + + return 0; +} + +static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct scatterlist *iv_sg = NULL; + unsigned int iv_len = 0; + int ret; + + if (!ctx->u.aes.key_len) + return -EINVAL; + + if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) || + (ctx->u.aes.mode == CCP_AES_MODE_CBC)) && + (req->cryptlen & (AES_BLOCK_SIZE - 1))) + return -EINVAL; + + if (ctx->u.aes.mode != CCP_AES_MODE_ECB) { + if (!req->iv) + return -EINVAL; + + memcpy(rctx->iv, req->iv, AES_BLOCK_SIZE); + iv_sg = &rctx->iv_sg; + iv_len = AES_BLOCK_SIZE; + sg_init_one(iv_sg, rctx->iv, iv_len); + } + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_AES; + rctx->cmd.u.aes.type = ctx->u.aes.type; + rctx->cmd.u.aes.mode = ctx->u.aes.mode; + rctx->cmd.u.aes.action = + (encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT; + rctx->cmd.u.aes.key = &ctx->u.aes.key_sg; + rctx->cmd.u.aes.key_len = ctx->u.aes.key_len; + rctx->cmd.u.aes.iv = iv_sg; + rctx->cmd.u.aes.iv_len = iv_len; + rctx->cmd.u.aes.src = req->src; + rctx->cmd.u.aes.src_len = req->cryptlen; + rctx->cmd.u.aes.dst = req->dst; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_aes_encrypt(struct skcipher_request *req) +{ + return ccp_aes_crypt(req, true); +} + +static int ccp_aes_decrypt(struct skcipher_request *req) +{ + return ccp_aes_crypt(req, false); +} + +static int ccp_aes_init_tfm(struct crypto_skcipher *tfm) +{ + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + + ctx->complete = ccp_aes_complete; + ctx->u.aes.key_len = 0; + + crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx)); + + return 0; +} + +static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req, + int ret) +{ + struct skcipher_request *req = skcipher_request_cast(async_req); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + + /* Restore the original pointer */ + req->iv = rctx->rfc3686_info; + + return ccp_aes_complete(async_req, ret); +} + +static int ccp_aes_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + + if (key_len < CTR_RFC3686_NONCE_SIZE) + return -EINVAL; + + key_len -= CTR_RFC3686_NONCE_SIZE; + memcpy(ctx->u.aes.nonce, key + key_len, CTR_RFC3686_NONCE_SIZE); + + return ccp_aes_setkey(tfm, key, key_len); +} + +static int ccp_aes_rfc3686_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + u8 *iv; + + /* Initialize the CTR block */ + iv = rctx->rfc3686_iv; + memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE); + + iv += CTR_RFC3686_NONCE_SIZE; + memcpy(iv, req->iv, CTR_RFC3686_IV_SIZE); + + iv += CTR_RFC3686_IV_SIZE; + *(__be32 *)iv = cpu_to_be32(1); + + /* Point to the new IV */ + rctx->rfc3686_info = req->iv; + req->iv = rctx->rfc3686_iv; + + return ccp_aes_crypt(req, encrypt); +} + +static int ccp_aes_rfc3686_encrypt(struct skcipher_request *req) +{ + return ccp_aes_rfc3686_crypt(req, true); +} + +static int ccp_aes_rfc3686_decrypt(struct skcipher_request *req) +{ + return ccp_aes_rfc3686_crypt(req, false); +} + +static int ccp_aes_rfc3686_init_tfm(struct crypto_skcipher *tfm) +{ + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + + ctx->complete = ccp_aes_rfc3686_complete; + ctx->u.aes.key_len = 0; + + crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx)); + + return 0; +} + +static const struct skcipher_alg ccp_aes_defaults = { + .setkey = ccp_aes_setkey, + .encrypt = ccp_aes_encrypt, + .decrypt = ccp_aes_decrypt, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .init = ccp_aes_init_tfm, + + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = AES_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_priority = CCP_CRA_PRIORITY, + .base.cra_module = THIS_MODULE, +}; + +static const struct skcipher_alg ccp_aes_rfc3686_defaults = { + .setkey = ccp_aes_rfc3686_setkey, + .encrypt = ccp_aes_rfc3686_encrypt, + .decrypt = ccp_aes_rfc3686_decrypt, + .min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, + .max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE, + .init = ccp_aes_rfc3686_init_tfm, + + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = CTR_RFC3686_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_priority = CCP_CRA_PRIORITY, + .base.cra_module = THIS_MODULE, +}; + +struct ccp_aes_def { + enum ccp_aes_mode mode; + unsigned int version; + const char *name; + const char *driver_name; + unsigned int blocksize; + unsigned int ivsize; + const struct skcipher_alg *alg_defaults; +}; + +static struct ccp_aes_def aes_algs[] = { + { + .mode = CCP_AES_MODE_ECB, + .version = CCP_VERSION(3, 0), + .name = "ecb(aes)", + .driver_name = "ecb-aes-ccp", + .blocksize = AES_BLOCK_SIZE, + .ivsize = 0, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_CBC, + .version = CCP_VERSION(3, 0), + .name = "cbc(aes)", + .driver_name = "cbc-aes-ccp", + .blocksize = AES_BLOCK_SIZE, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_CFB, + .version = CCP_VERSION(3, 0), + .name = "cfb(aes)", + .driver_name = "cfb-aes-ccp", + .blocksize = 1, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_OFB, + .version = CCP_VERSION(3, 0), + .name = "ofb(aes)", + .driver_name = "ofb-aes-ccp", + .blocksize = 1, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_CTR, + .version = CCP_VERSION(3, 0), + .name = "ctr(aes)", + .driver_name = "ctr-aes-ccp", + .blocksize = 1, + .ivsize = AES_BLOCK_SIZE, + .alg_defaults = &ccp_aes_defaults, + }, + { + .mode = CCP_AES_MODE_CTR, + .version = CCP_VERSION(3, 0), + .name = "rfc3686(ctr(aes))", + .driver_name = "rfc3686-ctr-aes-ccp", + .blocksize = 1, + .ivsize = CTR_RFC3686_IV_SIZE, + .alg_defaults = &ccp_aes_rfc3686_defaults, + }, +}; + +static int ccp_register_aes_alg(struct list_head *head, + const struct ccp_aes_def *def) +{ + struct ccp_crypto_skcipher_alg *ccp_alg; + struct skcipher_alg *alg; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + ccp_alg->mode = def->mode; + + /* Copy the defaults and override as necessary */ + alg = &ccp_alg->alg; + *alg = *def->alg_defaults; + snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->driver_name); + alg->base.cra_blocksize = def->blocksize; + alg->ivsize = def->ivsize; + + ret = crypto_register_skcipher(alg); + if (ret) { + pr_err("%s skcipher algorithm registration error (%d)\n", + alg->base.cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} + +int ccp_register_aes_algs(struct list_head *head) +{ + int i, ret; + unsigned int ccpversion = ccp_version(); + + for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { + if (aes_algs[i].version > ccpversion) + continue; + ret = ccp_register_aes_alg(head, &aes_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c new file mode 100644 index 000000000..ec97daf0f --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-des3.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) DES3 crypto API support + * + * Copyright (C) 2016,2017 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <ghook@amd.com> + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/scatterwalk.h> +#include <crypto/internal/des.h> + +#include "ccp-crypto.h" + +static int ccp_des3_complete(struct crypto_async_request *async_req, int ret) +{ + struct skcipher_request *req = skcipher_request_cast(async_req); + struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req); + + if (ret) + return ret; + + if (ctx->u.des3.mode != CCP_DES3_MODE_ECB) + memcpy(req->iv, rctx->iv, DES3_EDE_BLOCK_SIZE); + + return 0; +} + +static int ccp_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + int err; + + err = verify_skcipher_des3_key(tfm, key); + if (err) + return err; + + /* It's not clear that there is any support for a keysize of 112. + * If needed, the caller should make K1 == K3 + */ + ctx->u.des3.type = CCP_DES3_TYPE_168; + ctx->u.des3.mode = alg->mode; + ctx->u.des3.key_len = key_len; + + memcpy(ctx->u.des3.key, key, key_len); + sg_init_one(&ctx->u.des3.key_sg, ctx->u.des3.key, key_len); + + return 0; +} + +static int ccp_des3_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req); + struct scatterlist *iv_sg = NULL; + unsigned int iv_len = 0; + int ret; + + if (!ctx->u.des3.key_len) + return -EINVAL; + + if (((ctx->u.des3.mode == CCP_DES3_MODE_ECB) || + (ctx->u.des3.mode == CCP_DES3_MODE_CBC)) && + (req->cryptlen & (DES3_EDE_BLOCK_SIZE - 1))) + return -EINVAL; + + if (ctx->u.des3.mode != CCP_DES3_MODE_ECB) { + if (!req->iv) + return -EINVAL; + + memcpy(rctx->iv, req->iv, DES3_EDE_BLOCK_SIZE); + iv_sg = &rctx->iv_sg; + iv_len = DES3_EDE_BLOCK_SIZE; + sg_init_one(iv_sg, rctx->iv, iv_len); + } + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_DES3; + rctx->cmd.u.des3.type = ctx->u.des3.type; + rctx->cmd.u.des3.mode = ctx->u.des3.mode; + rctx->cmd.u.des3.action = (encrypt) + ? CCP_DES3_ACTION_ENCRYPT + : CCP_DES3_ACTION_DECRYPT; + rctx->cmd.u.des3.key = &ctx->u.des3.key_sg; + rctx->cmd.u.des3.key_len = ctx->u.des3.key_len; + rctx->cmd.u.des3.iv = iv_sg; + rctx->cmd.u.des3.iv_len = iv_len; + rctx->cmd.u.des3.src = req->src; + rctx->cmd.u.des3.src_len = req->cryptlen; + rctx->cmd.u.des3.dst = req->dst; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_des3_encrypt(struct skcipher_request *req) +{ + return ccp_des3_crypt(req, true); +} + +static int ccp_des3_decrypt(struct skcipher_request *req) +{ + return ccp_des3_crypt(req, false); +} + +static int ccp_des3_init_tfm(struct crypto_skcipher *tfm) +{ + struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + + ctx->complete = ccp_des3_complete; + ctx->u.des3.key_len = 0; + + crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_des3_req_ctx)); + + return 0; +} + +static const struct skcipher_alg ccp_des3_defaults = { + .setkey = ccp_des3_setkey, + .encrypt = ccp_des3_encrypt, + .decrypt = ccp_des3_decrypt, + .min_keysize = DES3_EDE_KEY_SIZE, + .max_keysize = DES3_EDE_KEY_SIZE, + .init = ccp_des3_init_tfm, + + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK, + .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, + .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_priority = CCP_CRA_PRIORITY, + .base.cra_module = THIS_MODULE, +}; + +struct ccp_des3_def { + enum ccp_des3_mode mode; + unsigned int version; + const char *name; + const char *driver_name; + unsigned int blocksize; + unsigned int ivsize; + const struct skcipher_alg *alg_defaults; +}; + +static const struct ccp_des3_def des3_algs[] = { + { + .mode = CCP_DES3_MODE_ECB, + .version = CCP_VERSION(5, 0), + .name = "ecb(des3_ede)", + .driver_name = "ecb-des3-ccp", + .blocksize = DES3_EDE_BLOCK_SIZE, + .ivsize = 0, + .alg_defaults = &ccp_des3_defaults, + }, + { + .mode = CCP_DES3_MODE_CBC, + .version = CCP_VERSION(5, 0), + .name = "cbc(des3_ede)", + .driver_name = "cbc-des3-ccp", + .blocksize = DES3_EDE_BLOCK_SIZE, + .ivsize = DES3_EDE_BLOCK_SIZE, + .alg_defaults = &ccp_des3_defaults, + }, +}; + +static int ccp_register_des3_alg(struct list_head *head, + const struct ccp_des3_def *def) +{ + struct ccp_crypto_skcipher_alg *ccp_alg; + struct skcipher_alg *alg; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + ccp_alg->mode = def->mode; + + /* Copy the defaults and override as necessary */ + alg = &ccp_alg->alg; + *alg = *def->alg_defaults; + snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->driver_name); + alg->base.cra_blocksize = def->blocksize; + alg->ivsize = def->ivsize; + + ret = crypto_register_skcipher(alg); + if (ret) { + pr_err("%s skcipher algorithm registration error (%d)\n", + alg->base.cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} + +int ccp_register_des3_algs(struct list_head *head) +{ + int i, ret; + unsigned int ccpversion = ccp_version(); + + for (i = 0; i < ARRAY_SIZE(des3_algs); i++) { + if (des3_algs[i].version > ccpversion) + continue; + ret = ccp_register_des3_alg(head, &des3_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c new file mode 100644 index 000000000..88275b486 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -0,0 +1,431 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) crypto API support + * + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/ccp.h> +#include <linux/scatterlist.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/akcipher.h> + +#include "ccp-crypto.h" + +MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.0.0"); +MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support"); + +static unsigned int aes_disable; +module_param(aes_disable, uint, 0444); +MODULE_PARM_DESC(aes_disable, "Disable use of AES - any non-zero value"); + +static unsigned int sha_disable; +module_param(sha_disable, uint, 0444); +MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); + +static unsigned int des3_disable; +module_param(des3_disable, uint, 0444); +MODULE_PARM_DESC(des3_disable, "Disable use of 3DES - any non-zero value"); + +static unsigned int rsa_disable; +module_param(rsa_disable, uint, 0444); +MODULE_PARM_DESC(rsa_disable, "Disable use of RSA - any non-zero value"); + +/* List heads for the supported algorithms */ +static LIST_HEAD(hash_algs); +static LIST_HEAD(skcipher_algs); +static LIST_HEAD(aead_algs); +static LIST_HEAD(akcipher_algs); + +/* For any tfm, requests for that tfm must be returned on the order + * received. With multiple queues available, the CCP can process more + * than one cmd at a time. Therefore we must maintain a cmd list to insure + * the proper ordering of requests on a given tfm. + */ +struct ccp_crypto_queue { + struct list_head cmds; + struct list_head *backlog; + unsigned int cmd_count; +}; + +#define CCP_CRYPTO_MAX_QLEN 100 + +static struct ccp_crypto_queue req_queue; +static spinlock_t req_queue_lock; + +struct ccp_crypto_cmd { + struct list_head entry; + + struct ccp_cmd *cmd; + + /* Save the crypto_tfm and crypto_async_request addresses + * separately to avoid any reference to a possibly invalid + * crypto_async_request structure after invoking the request + * callback + */ + struct crypto_async_request *req; + struct crypto_tfm *tfm; + + /* Used for held command processing to determine state */ + int ret; +}; + +struct ccp_crypto_cpu { + struct work_struct work; + struct completion completion; + struct ccp_crypto_cmd *crypto_cmd; + int err; +}; + +static inline bool ccp_crypto_success(int err) +{ + if (err && (err != -EINPROGRESS) && (err != -EBUSY)) + return false; + + return true; +} + +static struct ccp_crypto_cmd *ccp_crypto_cmd_complete( + struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog) +{ + struct ccp_crypto_cmd *held = NULL, *tmp; + unsigned long flags; + + *backlog = NULL; + + spin_lock_irqsave(&req_queue_lock, flags); + + /* Held cmds will be after the current cmd in the queue so start + * searching for a cmd with a matching tfm for submission. + */ + tmp = crypto_cmd; + list_for_each_entry_continue(tmp, &req_queue.cmds, entry) { + if (crypto_cmd->tfm != tmp->tfm) + continue; + held = tmp; + break; + } + + /* Process the backlog: + * Because cmds can be executed from any point in the cmd list + * special precautions have to be taken when handling the backlog. + */ + if (req_queue.backlog != &req_queue.cmds) { + /* Skip over this cmd if it is the next backlog cmd */ + if (req_queue.backlog == &crypto_cmd->entry) + req_queue.backlog = crypto_cmd->entry.next; + + *backlog = container_of(req_queue.backlog, + struct ccp_crypto_cmd, entry); + req_queue.backlog = req_queue.backlog->next; + + /* Skip over this cmd if it is now the next backlog cmd */ + if (req_queue.backlog == &crypto_cmd->entry) + req_queue.backlog = crypto_cmd->entry.next; + } + + /* Remove the cmd entry from the list of cmds */ + req_queue.cmd_count--; + list_del(&crypto_cmd->entry); + + spin_unlock_irqrestore(&req_queue_lock, flags); + + return held; +} + +static void ccp_crypto_complete(void *data, int err) +{ + struct ccp_crypto_cmd *crypto_cmd = data; + struct ccp_crypto_cmd *held, *next, *backlog; + struct crypto_async_request *req = crypto_cmd->req; + struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm); + int ret; + + if (err == -EINPROGRESS) { + /* Only propagate the -EINPROGRESS if necessary */ + if (crypto_cmd->ret == -EBUSY) { + crypto_cmd->ret = -EINPROGRESS; + req->complete(req, -EINPROGRESS); + } + + return; + } + + /* Operation has completed - update the queue before invoking + * the completion callbacks and retrieve the next cmd (cmd with + * a matching tfm) that can be submitted to the CCP. + */ + held = ccp_crypto_cmd_complete(crypto_cmd, &backlog); + if (backlog) { + backlog->ret = -EINPROGRESS; + backlog->req->complete(backlog->req, -EINPROGRESS); + } + + /* Transition the state from -EBUSY to -EINPROGRESS first */ + if (crypto_cmd->ret == -EBUSY) + req->complete(req, -EINPROGRESS); + + /* Completion callbacks */ + ret = err; + if (ctx->complete) + ret = ctx->complete(req, ret); + req->complete(req, ret); + + /* Submit the next cmd */ + while (held) { + /* Since we have already queued the cmd, we must indicate that + * we can backlog so as not to "lose" this request. + */ + held->cmd->flags |= CCP_CMD_MAY_BACKLOG; + ret = ccp_enqueue_cmd(held->cmd); + if (ccp_crypto_success(ret)) + break; + + /* Error occurred, report it and get the next entry */ + ctx = crypto_tfm_ctx(held->req->tfm); + if (ctx->complete) + ret = ctx->complete(held->req, ret); + held->req->complete(held->req, ret); + + next = ccp_crypto_cmd_complete(held, &backlog); + if (backlog) { + backlog->ret = -EINPROGRESS; + backlog->req->complete(backlog->req, -EINPROGRESS); + } + + kfree(held); + held = next; + } + + kfree(crypto_cmd); +} + +static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) +{ + struct ccp_crypto_cmd *active = NULL, *tmp; + unsigned long flags; + bool free_cmd = true; + int ret; + + spin_lock_irqsave(&req_queue_lock, flags); + + /* Check if the cmd can/should be queued */ + if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { + if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) { + ret = -ENOSPC; + goto e_lock; + } + } + + /* Look for an entry with the same tfm. If there is a cmd + * with the same tfm in the list then the current cmd cannot + * be submitted to the CCP yet. + */ + list_for_each_entry(tmp, &req_queue.cmds, entry) { + if (crypto_cmd->tfm != tmp->tfm) + continue; + active = tmp; + break; + } + + ret = -EINPROGRESS; + if (!active) { + ret = ccp_enqueue_cmd(crypto_cmd->cmd); + if (!ccp_crypto_success(ret)) + goto e_lock; /* Error, don't queue it */ + } + + if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { + ret = -EBUSY; + if (req_queue.backlog == &req_queue.cmds) + req_queue.backlog = &crypto_cmd->entry; + } + crypto_cmd->ret = ret; + + req_queue.cmd_count++; + list_add_tail(&crypto_cmd->entry, &req_queue.cmds); + + free_cmd = false; + +e_lock: + spin_unlock_irqrestore(&req_queue_lock, flags); + + if (free_cmd) + kfree(crypto_cmd); + + return ret; +} + +/** + * ccp_crypto_enqueue_request - queue an crypto async request for processing + * by the CCP + * + * @req: crypto_async_request struct to be processed + * @cmd: ccp_cmd struct to be sent to the CCP + */ +int ccp_crypto_enqueue_request(struct crypto_async_request *req, + struct ccp_cmd *cmd) +{ + struct ccp_crypto_cmd *crypto_cmd; + gfp_t gfp; + + gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; + + crypto_cmd = kzalloc(sizeof(*crypto_cmd), gfp); + if (!crypto_cmd) + return -ENOMEM; + + /* The tfm pointer must be saved and not referenced from the + * crypto_async_request (req) pointer because it is used after + * completion callback for the request and the req pointer + * might not be valid anymore. + */ + crypto_cmd->cmd = cmd; + crypto_cmd->req = req; + crypto_cmd->tfm = req->tfm; + + cmd->callback = ccp_crypto_complete; + cmd->data = crypto_cmd; + + if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG) + cmd->flags |= CCP_CMD_MAY_BACKLOG; + else + cmd->flags &= ~CCP_CMD_MAY_BACKLOG; + + return ccp_crypto_enqueue_cmd(crypto_cmd); +} + +struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, + struct scatterlist *sg_add) +{ + struct scatterlist *sg, *sg_last = NULL; + + for (sg = table->sgl; sg; sg = sg_next(sg)) + if (!sg_page(sg)) + break; + if (WARN_ON(!sg)) + return NULL; + + for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) { + sg_set_page(sg, sg_page(sg_add), sg_add->length, + sg_add->offset); + sg_last = sg; + } + if (WARN_ON(sg_add)) + return NULL; + + return sg_last; +} + +static int ccp_register_algs(void) +{ + int ret; + + if (!aes_disable) { + ret = ccp_register_aes_algs(&skcipher_algs); + if (ret) + return ret; + + ret = ccp_register_aes_cmac_algs(&hash_algs); + if (ret) + return ret; + + ret = ccp_register_aes_xts_algs(&skcipher_algs); + if (ret) + return ret; + + ret = ccp_register_aes_aeads(&aead_algs); + if (ret) + return ret; + } + + if (!des3_disable) { + ret = ccp_register_des3_algs(&skcipher_algs); + if (ret) + return ret; + } + + if (!sha_disable) { + ret = ccp_register_sha_algs(&hash_algs); + if (ret) + return ret; + } + + if (!rsa_disable) { + ret = ccp_register_rsa_algs(&akcipher_algs); + if (ret) + return ret; + } + + return 0; +} + +static void ccp_unregister_algs(void) +{ + struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp; + struct ccp_crypto_skcipher_alg *ablk_alg, *ablk_tmp; + struct ccp_crypto_aead *aead_alg, *aead_tmp; + struct ccp_crypto_akcipher_alg *akc_alg, *akc_tmp; + + list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) { + crypto_unregister_ahash(&ahash_alg->alg); + list_del(&ahash_alg->entry); + kfree(ahash_alg); + } + + list_for_each_entry_safe(ablk_alg, ablk_tmp, &skcipher_algs, entry) { + crypto_unregister_skcipher(&ablk_alg->alg); + list_del(&ablk_alg->entry); + kfree(ablk_alg); + } + + list_for_each_entry_safe(aead_alg, aead_tmp, &aead_algs, entry) { + crypto_unregister_aead(&aead_alg->alg); + list_del(&aead_alg->entry); + kfree(aead_alg); + } + + list_for_each_entry_safe(akc_alg, akc_tmp, &akcipher_algs, entry) { + crypto_unregister_akcipher(&akc_alg->alg); + list_del(&akc_alg->entry); + kfree(akc_alg); + } +} + +static int ccp_crypto_init(void) +{ + int ret; + + ret = ccp_present(); + if (ret) { + pr_err("Cannot load: there are no available CCPs\n"); + return ret; + } + + spin_lock_init(&req_queue_lock); + INIT_LIST_HEAD(&req_queue.cmds); + req_queue.backlog = &req_queue.cmds; + req_queue.cmd_count = 0; + + ret = ccp_register_algs(); + if (ret) + ccp_unregister_algs(); + + return ret; +} + +static void ccp_crypto_exit(void) +{ + ccp_unregister_algs(); +} + +module_init(ccp_crypto_init); +module_exit(ccp_crypto_exit); diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c new file mode 100644 index 000000000..1223ac70a --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-rsa.c @@ -0,0 +1,293 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) RSA crypto API support + * + * Copyright (C) 2017 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/internal/rsa.h> +#include <crypto/internal/akcipher.h> +#include <crypto/akcipher.h> +#include <crypto/scatterwalk.h> + +#include "ccp-crypto.h" + +static inline struct akcipher_request *akcipher_request_cast( + struct crypto_async_request *req) +{ + return container_of(req, struct akcipher_request, base); +} + +static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen, + const u8 *buf, size_t sz) +{ + int nskip; + + for (nskip = 0; nskip < sz; nskip++) + if (buf[nskip]) + break; + *kplen = sz - nskip; + *kpbuf = kmemdup(buf + nskip, *kplen, GFP_KERNEL); + if (!*kpbuf) + return -ENOMEM; + + return 0; +} + +static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret) +{ + struct akcipher_request *req = akcipher_request_cast(async_req); + struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req); + + if (ret) + return ret; + + req->dst_len = rctx->cmd.u.rsa.key_size >> 3; + + return 0; +} + +static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm) +{ + struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + + return ctx->u.rsa.n_len; +} + +static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req); + int ret = 0; + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_RSA; + + rctx->cmd.u.rsa.key_size = ctx->u.rsa.key_len; /* in bits */ + if (encrypt) { + rctx->cmd.u.rsa.exp = &ctx->u.rsa.e_sg; + rctx->cmd.u.rsa.exp_len = ctx->u.rsa.e_len; + } else { + rctx->cmd.u.rsa.exp = &ctx->u.rsa.d_sg; + rctx->cmd.u.rsa.exp_len = ctx->u.rsa.d_len; + } + rctx->cmd.u.rsa.mod = &ctx->u.rsa.n_sg; + rctx->cmd.u.rsa.mod_len = ctx->u.rsa.n_len; + rctx->cmd.u.rsa.src = req->src; + rctx->cmd.u.rsa.src_len = req->src_len; + rctx->cmd.u.rsa.dst = req->dst; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; +} + +static int ccp_rsa_encrypt(struct akcipher_request *req) +{ + return ccp_rsa_crypt(req, true); +} + +static int ccp_rsa_decrypt(struct akcipher_request *req) +{ + return ccp_rsa_crypt(req, false); +} + +static int ccp_check_key_length(unsigned int len) +{ + /* In bits */ + if (len < 8 || len > 4096) + return -EINVAL; + return 0; +} + +static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx) +{ + /* Clean up old key data */ + kfree_sensitive(ctx->u.rsa.e_buf); + ctx->u.rsa.e_buf = NULL; + ctx->u.rsa.e_len = 0; + kfree_sensitive(ctx->u.rsa.n_buf); + ctx->u.rsa.n_buf = NULL; + ctx->u.rsa.n_len = 0; + kfree_sensitive(ctx->u.rsa.d_buf); + ctx->u.rsa.d_buf = NULL; + ctx->u.rsa.d_len = 0; +} + +static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen, bool private) +{ + struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key; + int ret; + + ccp_rsa_free_key_bufs(ctx); + memset(&raw_key, 0, sizeof(raw_key)); + + /* Code borrowed from crypto/rsa.c */ + if (private) + ret = rsa_parse_priv_key(&raw_key, key, keylen); + else + ret = rsa_parse_pub_key(&raw_key, key, keylen); + if (ret) + goto n_key; + + ret = ccp_copy_and_save_keypart(&ctx->u.rsa.n_buf, &ctx->u.rsa.n_len, + raw_key.n, raw_key.n_sz); + if (ret) + goto key_err; + sg_init_one(&ctx->u.rsa.n_sg, ctx->u.rsa.n_buf, ctx->u.rsa.n_len); + + ctx->u.rsa.key_len = ctx->u.rsa.n_len << 3; /* convert to bits */ + if (ccp_check_key_length(ctx->u.rsa.key_len)) { + ret = -EINVAL; + goto key_err; + } + + ret = ccp_copy_and_save_keypart(&ctx->u.rsa.e_buf, &ctx->u.rsa.e_len, + raw_key.e, raw_key.e_sz); + if (ret) + goto key_err; + sg_init_one(&ctx->u.rsa.e_sg, ctx->u.rsa.e_buf, ctx->u.rsa.e_len); + + if (private) { + ret = ccp_copy_and_save_keypart(&ctx->u.rsa.d_buf, + &ctx->u.rsa.d_len, + raw_key.d, raw_key.d_sz); + if (ret) + goto key_err; + sg_init_one(&ctx->u.rsa.d_sg, + ctx->u.rsa.d_buf, ctx->u.rsa.d_len); + } + + return 0; + +key_err: + ccp_rsa_free_key_bufs(ctx); + +n_key: + return ret; +} + +static int ccp_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + return ccp_rsa_setkey(tfm, key, keylen, true); +} + +static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + return ccp_rsa_setkey(tfm, key, keylen, false); +} + +static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + + akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx)); + ctx->complete = ccp_rsa_complete; + + return 0; +} + +static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base); + + ccp_rsa_free_key_bufs(ctx); +} + +static struct akcipher_alg ccp_rsa_defaults = { + .encrypt = ccp_rsa_encrypt, + .decrypt = ccp_rsa_decrypt, + .set_pub_key = ccp_rsa_setpubkey, + .set_priv_key = ccp_rsa_setprivkey, + .max_size = ccp_rsa_maxsize, + .init = ccp_rsa_init_tfm, + .exit = ccp_rsa_exit_tfm, + .base = { + .cra_name = "rsa", + .cra_driver_name = "rsa-ccp", + .cra_priority = CCP_CRA_PRIORITY, + .cra_module = THIS_MODULE, + .cra_ctxsize = 2 * sizeof(struct ccp_ctx), + }, +}; + +struct ccp_rsa_def { + unsigned int version; + const char *name; + const char *driver_name; + unsigned int reqsize; + struct akcipher_alg *alg_defaults; +}; + +static struct ccp_rsa_def rsa_algs[] = { + { + .version = CCP_VERSION(3, 0), + .name = "rsa", + .driver_name = "rsa-ccp", + .reqsize = sizeof(struct ccp_rsa_req_ctx), + .alg_defaults = &ccp_rsa_defaults, + } +}; + +static int ccp_register_rsa_alg(struct list_head *head, + const struct ccp_rsa_def *def) +{ + struct ccp_crypto_akcipher_alg *ccp_alg; + struct akcipher_alg *alg; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + alg = &ccp_alg->alg; + *alg = *def->alg_defaults; + snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->driver_name); + ret = crypto_register_akcipher(alg); + if (ret) { + pr_err("%s akcipher algorithm registration error (%d)\n", + alg->base.cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return 0; +} + +int ccp_register_rsa_algs(struct list_head *head) +{ + int i, ret; + unsigned int ccpversion = ccp_version(); + + /* Register the RSA algorithm in standard mode + * This works for CCP v3 and later + */ + for (i = 0; i < ARRAY_SIZE(rsa_algs); i++) { + if (rsa_algs[i].version > ccpversion) + continue; + ret = ccp_register_rsa_alg(head, &rsa_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c new file mode 100644 index 000000000..8fbfdb9e8 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -0,0 +1,529 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) SHA crypto API support + * + * Copyright (C) 2013,2018 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/crypto.h> +#include <crypto/algapi.h> +#include <crypto/hash.h> +#include <crypto/hmac.h> +#include <crypto/internal/hash.h> +#include <crypto/sha.h> +#include <crypto/scatterwalk.h> +#include <linux/string.h> + +#include "ccp-crypto.h" + +static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) +{ + struct ahash_request *req = ahash_request_cast(async_req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + unsigned int digest_size = crypto_ahash_digestsize(tfm); + + if (ret) + goto e_free; + + if (rctx->hash_rem) { + /* Save remaining data to buffer */ + unsigned int offset = rctx->nbytes - rctx->hash_rem; + + scatterwalk_map_and_copy(rctx->buf, rctx->src, + offset, rctx->hash_rem, 0); + rctx->buf_count = rctx->hash_rem; + } else { + rctx->buf_count = 0; + } + + /* Update result area if supplied */ + if (req->result && rctx->final) + memcpy(req->result, rctx->ctx, digest_size); + +e_free: + sg_free_table(&rctx->data_sg); + + return ret; +} + +static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, + unsigned int final) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct scatterlist *sg; + unsigned int block_size = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + unsigned int sg_count; + gfp_t gfp; + u64 len; + int ret; + + len = (u64)rctx->buf_count + (u64)nbytes; + + if (!final && (len <= block_size)) { + scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src, + 0, nbytes, 0); + rctx->buf_count += nbytes; + + return 0; + } + + rctx->src = req->src; + rctx->nbytes = nbytes; + + rctx->final = final; + rctx->hash_rem = final ? 0 : len & (block_size - 1); + rctx->hash_cnt = len - rctx->hash_rem; + if (!final && !rctx->hash_rem) { + /* CCP can't do zero length final, so keep some data around */ + rctx->hash_cnt -= block_size; + rctx->hash_rem = block_size; + } + + /* Initialize the context scatterlist */ + sg_init_one(&rctx->ctx_sg, rctx->ctx, sizeof(rctx->ctx)); + + sg = NULL; + if (rctx->buf_count && nbytes) { + /* Build the data scatterlist table - allocate enough entries + * for both data pieces (buffer and input data) + */ + gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ? + GFP_KERNEL : GFP_ATOMIC; + sg_count = sg_nents(req->src) + 1; + ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp); + if (ret) + return ret; + + sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); + sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg); + if (!sg) { + ret = -EINVAL; + goto e_free; + } + sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src); + if (!sg) { + ret = -EINVAL; + goto e_free; + } + sg_mark_end(sg); + + sg = rctx->data_sg.sgl; + } else if (rctx->buf_count) { + sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count); + + sg = &rctx->buf_sg; + } else if (nbytes) { + sg = req->src; + } + + rctx->msg_bits += (rctx->hash_cnt << 3); /* Total in bits */ + + memset(&rctx->cmd, 0, sizeof(rctx->cmd)); + INIT_LIST_HEAD(&rctx->cmd.entry); + rctx->cmd.engine = CCP_ENGINE_SHA; + rctx->cmd.u.sha.type = rctx->type; + rctx->cmd.u.sha.ctx = &rctx->ctx_sg; + + switch (rctx->type) { + case CCP_SHA_TYPE_1: + rctx->cmd.u.sha.ctx_len = SHA1_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_224: + rctx->cmd.u.sha.ctx_len = SHA224_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_256: + rctx->cmd.u.sha.ctx_len = SHA256_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_384: + rctx->cmd.u.sha.ctx_len = SHA384_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_512: + rctx->cmd.u.sha.ctx_len = SHA512_DIGEST_SIZE; + break; + default: + /* Should never get here */ + break; + } + + rctx->cmd.u.sha.src = sg; + rctx->cmd.u.sha.src_len = rctx->hash_cnt; + rctx->cmd.u.sha.opad = ctx->u.sha.key_len ? + &ctx->u.sha.opad_sg : NULL; + rctx->cmd.u.sha.opad_len = ctx->u.sha.key_len ? + ctx->u.sha.opad_count : 0; + rctx->cmd.u.sha.first = rctx->first; + rctx->cmd.u.sha.final = rctx->final; + rctx->cmd.u.sha.msg_bits = rctx->msg_bits; + + rctx->first = 0; + + ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd); + + return ret; + +e_free: + sg_free_table(&rctx->data_sg); + + return ret; +} + +static int ccp_sha_init(struct ahash_request *req) +{ + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_crypto_ahash_alg *alg = + ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); + unsigned int block_size = + crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + + memset(rctx, 0, sizeof(*rctx)); + + rctx->type = alg->type; + rctx->first = 1; + + if (ctx->u.sha.key_len) { + /* Buffer the HMAC key for first update */ + memcpy(rctx->buf, ctx->u.sha.ipad, block_size); + rctx->buf_count = block_size; + } + + return 0; +} + +static int ccp_sha_update(struct ahash_request *req) +{ + return ccp_do_sha_update(req, req->nbytes, 0); +} + +static int ccp_sha_final(struct ahash_request *req) +{ + return ccp_do_sha_update(req, 0, 1); +} + +static int ccp_sha_finup(struct ahash_request *req) +{ + return ccp_do_sha_update(req, req->nbytes, 1); +} + +static int ccp_sha_digest(struct ahash_request *req) +{ + int ret; + + ret = ccp_sha_init(req); + if (ret) + return ret; + + return ccp_sha_finup(req); +} + +static int ccp_sha_export(struct ahash_request *req, void *out) +{ + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_exp_ctx state; + + /* Don't let anything leak to 'out' */ + memset(&state, 0, sizeof(state)); + + state.type = rctx->type; + state.msg_bits = rctx->msg_bits; + state.first = rctx->first; + memcpy(state.ctx, rctx->ctx, sizeof(state.ctx)); + state.buf_count = rctx->buf_count; + memcpy(state.buf, rctx->buf, sizeof(state.buf)); + + /* 'out' may not be aligned so memcpy from local variable */ + memcpy(out, &state, sizeof(state)); + + return 0; +} + +static int ccp_sha_import(struct ahash_request *req, const void *in) +{ + struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_exp_ctx state; + + /* 'in' may not be aligned so memcpy to local variable */ + memcpy(&state, in, sizeof(state)); + + memset(rctx, 0, sizeof(*rctx)); + rctx->type = state.type; + rctx->msg_bits = state.msg_bits; + rctx->first = state.first; + memcpy(rctx->ctx, state.ctx, sizeof(rctx->ctx)); + rctx->buf_count = state.buf_count; + memcpy(rctx->buf, state.buf, sizeof(rctx->buf)); + + return 0; +} + +static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int key_len) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct crypto_shash *shash = ctx->u.sha.hmac_tfm; + unsigned int block_size = crypto_shash_blocksize(shash); + unsigned int digest_size = crypto_shash_digestsize(shash); + int i, ret; + + /* Set to zero until complete */ + ctx->u.sha.key_len = 0; + + /* Clear key area to provide zero padding for keys smaller + * than the block size + */ + memset(ctx->u.sha.key, 0, sizeof(ctx->u.sha.key)); + + if (key_len > block_size) { + /* Must hash the input key */ + ret = crypto_shash_tfm_digest(shash, key, key_len, + ctx->u.sha.key); + if (ret) + return -EINVAL; + + key_len = digest_size; + } else { + memcpy(ctx->u.sha.key, key, key_len); + } + + for (i = 0; i < block_size; i++) { + ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ HMAC_IPAD_VALUE; + ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ HMAC_OPAD_VALUE; + } + + sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size); + ctx->u.sha.opad_count = block_size; + + ctx->u.sha.key_len = key_len; + + return 0; +} + +static int ccp_sha_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + + ctx->complete = ccp_sha_complete; + ctx->u.sha.key_len = 0; + + crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx)); + + return 0; +} + +static void ccp_sha_cra_exit(struct crypto_tfm *tfm) +{ +} + +static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm); + struct crypto_shash *hmac_tfm; + + hmac_tfm = crypto_alloc_shash(alg->child_alg, 0, 0); + if (IS_ERR(hmac_tfm)) { + pr_warn("could not load driver %s need for HMAC support\n", + alg->child_alg); + return PTR_ERR(hmac_tfm); + } + + ctx->u.sha.hmac_tfm = hmac_tfm; + + return ccp_sha_cra_init(tfm); +} + +static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm) +{ + struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->u.sha.hmac_tfm) + crypto_free_shash(ctx->u.sha.hmac_tfm); + + ccp_sha_cra_exit(tfm); +} + +struct ccp_sha_def { + unsigned int version; + const char *name; + const char *drv_name; + enum ccp_sha_type type; + u32 digest_size; + u32 block_size; +}; + +static struct ccp_sha_def sha_algs[] = { + { + .version = CCP_VERSION(3, 0), + .name = "sha1", + .drv_name = "sha1-ccp", + .type = CCP_SHA_TYPE_1, + .digest_size = SHA1_DIGEST_SIZE, + .block_size = SHA1_BLOCK_SIZE, + }, + { + .version = CCP_VERSION(3, 0), + .name = "sha224", + .drv_name = "sha224-ccp", + .type = CCP_SHA_TYPE_224, + .digest_size = SHA224_DIGEST_SIZE, + .block_size = SHA224_BLOCK_SIZE, + }, + { + .version = CCP_VERSION(3, 0), + .name = "sha256", + .drv_name = "sha256-ccp", + .type = CCP_SHA_TYPE_256, + .digest_size = SHA256_DIGEST_SIZE, + .block_size = SHA256_BLOCK_SIZE, + }, + { + .version = CCP_VERSION(5, 0), + .name = "sha384", + .drv_name = "sha384-ccp", + .type = CCP_SHA_TYPE_384, + .digest_size = SHA384_DIGEST_SIZE, + .block_size = SHA384_BLOCK_SIZE, + }, + { + .version = CCP_VERSION(5, 0), + .name = "sha512", + .drv_name = "sha512-ccp", + .type = CCP_SHA_TYPE_512, + .digest_size = SHA512_DIGEST_SIZE, + .block_size = SHA512_BLOCK_SIZE, + }, +}; + +static int ccp_register_hmac_alg(struct list_head *head, + const struct ccp_sha_def *def, + const struct ccp_crypto_ahash_alg *base_alg) +{ + struct ccp_crypto_ahash_alg *ccp_alg; + struct ahash_alg *alg; + struct hash_alg_common *halg; + struct crypto_alg *base; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + /* Copy the base algorithm and only change what's necessary */ + *ccp_alg = *base_alg; + INIT_LIST_HEAD(&ccp_alg->entry); + + strscpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME); + + alg = &ccp_alg->alg; + alg->setkey = ccp_sha_setkey; + + halg = &alg->halg; + + base = &halg->base; + snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", def->name); + snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s", + def->drv_name); + base->cra_init = ccp_hmac_sha_cra_init; + base->cra_exit = ccp_hmac_sha_cra_exit; + + ret = crypto_register_ahash(alg); + if (ret) { + pr_err("%s ahash algorithm registration error (%d)\n", + base->cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + return ret; +} + +static int ccp_register_sha_alg(struct list_head *head, + const struct ccp_sha_def *def) +{ + struct ccp_crypto_ahash_alg *ccp_alg; + struct ahash_alg *alg; + struct hash_alg_common *halg; + struct crypto_alg *base; + int ret; + + ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL); + if (!ccp_alg) + return -ENOMEM; + + INIT_LIST_HEAD(&ccp_alg->entry); + + ccp_alg->type = def->type; + + alg = &ccp_alg->alg; + alg->init = ccp_sha_init; + alg->update = ccp_sha_update; + alg->final = ccp_sha_final; + alg->finup = ccp_sha_finup; + alg->digest = ccp_sha_digest; + alg->export = ccp_sha_export; + alg->import = ccp_sha_import; + + halg = &alg->halg; + halg->digestsize = def->digest_size; + halg->statesize = sizeof(struct ccp_sha_exp_ctx); + + base = &halg->base; + snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name); + snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", + def->drv_name); + base->cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_NEED_FALLBACK; + base->cra_blocksize = def->block_size; + base->cra_ctxsize = sizeof(struct ccp_ctx); + base->cra_priority = CCP_CRA_PRIORITY; + base->cra_init = ccp_sha_cra_init; + base->cra_exit = ccp_sha_cra_exit; + base->cra_module = THIS_MODULE; + + ret = crypto_register_ahash(alg); + if (ret) { + pr_err("%s ahash algorithm registration error (%d)\n", + base->cra_name, ret); + kfree(ccp_alg); + return ret; + } + + list_add(&ccp_alg->entry, head); + + ret = ccp_register_hmac_alg(head, def, ccp_alg); + + return ret; +} + +int ccp_register_sha_algs(struct list_head *head) +{ + int i, ret; + unsigned int ccpversion = ccp_version(); + + for (i = 0; i < ARRAY_SIZE(sha_algs); i++) { + if (sha_algs[i].version > ccpversion) + continue; + ret = ccp_register_sha_alg(head, &sha_algs[i]); + if (ret) + return ret; + } + + return 0; +} diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h new file mode 100644 index 000000000..aed3d2192 --- /dev/null +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Cryptographic Coprocessor (CCP) crypto API support + * + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#ifndef __CCP_CRYPTO_H__ +#define __CCP_CRYPTO_H__ + +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/ccp.h> +#include <crypto/algapi.h> +#include <crypto/aes.h> +#include <crypto/internal/aead.h> +#include <crypto/aead.h> +#include <crypto/ctr.h> +#include <crypto/hash.h> +#include <crypto/sha.h> +#include <crypto/akcipher.h> +#include <crypto/skcipher.h> +#include <crypto/internal/rsa.h> + +/* We want the module name in front of our messages */ +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define CCP_LOG_LEVEL KERN_INFO + +#define CCP_CRA_PRIORITY 300 + +struct ccp_crypto_skcipher_alg { + struct list_head entry; + + u32 mode; + + struct skcipher_alg alg; +}; + +struct ccp_crypto_aead { + struct list_head entry; + + u32 mode; + + struct aead_alg alg; +}; + +struct ccp_crypto_ahash_alg { + struct list_head entry; + + const __be32 *init; + u32 type; + u32 mode; + + /* Child algorithm used for HMAC, CMAC, etc */ + char child_alg[CRYPTO_MAX_ALG_NAME]; + + struct ahash_alg alg; +}; + +struct ccp_crypto_akcipher_alg { + struct list_head entry; + + struct akcipher_alg alg; +}; + +static inline struct ccp_crypto_skcipher_alg * + ccp_crypto_skcipher_alg(struct crypto_skcipher *tfm) +{ + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + + return container_of(alg, struct ccp_crypto_skcipher_alg, alg); +} + +static inline struct ccp_crypto_ahash_alg * + ccp_crypto_ahash_alg(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct ahash_alg *ahash_alg; + + ahash_alg = container_of(alg, struct ahash_alg, halg.base); + + return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg); +} + +/***** AES related defines *****/ +struct ccp_aes_ctx { + /* Fallback cipher for XTS with unsupported unit sizes */ + struct crypto_skcipher *tfm_skcipher; + + enum ccp_engine engine; + enum ccp_aes_type type; + enum ccp_aes_mode mode; + + struct scatterlist key_sg; + unsigned int key_len; + u8 key[AES_MAX_KEY_SIZE * 2]; + + u8 nonce[CTR_RFC3686_NONCE_SIZE]; + + /* CMAC key structures */ + struct scatterlist k1_sg; + struct scatterlist k2_sg; + unsigned int kn_len; + u8 k1[AES_BLOCK_SIZE]; + u8 k2[AES_BLOCK_SIZE]; +}; + +struct ccp_aes_req_ctx { + struct scatterlist iv_sg; + u8 iv[AES_BLOCK_SIZE]; + + struct scatterlist tag_sg; + u8 tag[AES_BLOCK_SIZE]; + + /* Fields used for RFC3686 requests */ + u8 *rfc3686_info; + u8 rfc3686_iv[AES_BLOCK_SIZE]; + + struct ccp_cmd cmd; + + struct skcipher_request fallback_req; // keep at the end +}; + +struct ccp_aes_cmac_req_ctx { + unsigned int null_msg; + unsigned int final; + + struct scatterlist *src; + unsigned int nbytes; + + u64 hash_cnt; + unsigned int hash_rem; + + struct sg_table data_sg; + + struct scatterlist iv_sg; + u8 iv[AES_BLOCK_SIZE]; + + struct scatterlist buf_sg; + unsigned int buf_count; + u8 buf[AES_BLOCK_SIZE]; + + struct scatterlist pad_sg; + unsigned int pad_count; + u8 pad[AES_BLOCK_SIZE]; + + struct ccp_cmd cmd; +}; + +struct ccp_aes_cmac_exp_ctx { + unsigned int null_msg; + + u8 iv[AES_BLOCK_SIZE]; + + unsigned int buf_count; + u8 buf[AES_BLOCK_SIZE]; +}; + +/***** 3DES related defines *****/ +struct ccp_des3_ctx { + enum ccp_engine engine; + enum ccp_des3_type type; + enum ccp_des3_mode mode; + + struct scatterlist key_sg; + unsigned int key_len; + u8 key[AES_MAX_KEY_SIZE]; +}; + +struct ccp_des3_req_ctx { + struct scatterlist iv_sg; + u8 iv[AES_BLOCK_SIZE]; + + struct ccp_cmd cmd; +}; + +/* SHA-related defines + * These values must be large enough to accommodate any variant + */ +#define MAX_SHA_CONTEXT_SIZE SHA512_DIGEST_SIZE +#define MAX_SHA_BLOCK_SIZE SHA512_BLOCK_SIZE + +struct ccp_sha_ctx { + struct scatterlist opad_sg; + unsigned int opad_count; + + unsigned int key_len; + u8 key[MAX_SHA_BLOCK_SIZE]; + u8 ipad[MAX_SHA_BLOCK_SIZE]; + u8 opad[MAX_SHA_BLOCK_SIZE]; + struct crypto_shash *hmac_tfm; +}; + +struct ccp_sha_req_ctx { + enum ccp_sha_type type; + + u64 msg_bits; + + unsigned int first; + unsigned int final; + + struct scatterlist *src; + unsigned int nbytes; + + u64 hash_cnt; + unsigned int hash_rem; + + struct sg_table data_sg; + + struct scatterlist ctx_sg; + u8 ctx[MAX_SHA_CONTEXT_SIZE]; + + struct scatterlist buf_sg; + unsigned int buf_count; + u8 buf[MAX_SHA_BLOCK_SIZE]; + + /* CCP driver command */ + struct ccp_cmd cmd; +}; + +struct ccp_sha_exp_ctx { + enum ccp_sha_type type; + + u64 msg_bits; + + unsigned int first; + + u8 ctx[MAX_SHA_CONTEXT_SIZE]; + + unsigned int buf_count; + u8 buf[MAX_SHA_BLOCK_SIZE]; +}; + +/***** RSA related defines *****/ + +struct ccp_rsa_ctx { + unsigned int key_len; /* in bits */ + struct scatterlist e_sg; + u8 *e_buf; + unsigned int e_len; + struct scatterlist n_sg; + u8 *n_buf; + unsigned int n_len; + struct scatterlist d_sg; + u8 *d_buf; + unsigned int d_len; +}; + +struct ccp_rsa_req_ctx { + struct ccp_cmd cmd; +}; + +#define CCP_RSA_MAXMOD (4 * 1024 / 8) +#define CCP5_RSA_MAXMOD (16 * 1024 / 8) + +/***** Common Context Structure *****/ +struct ccp_ctx { + int (*complete)(struct crypto_async_request *req, int ret); + + union { + struct ccp_aes_ctx aes; + struct ccp_rsa_ctx rsa; + struct ccp_sha_ctx sha; + struct ccp_des3_ctx des3; + } u; +}; + +int ccp_crypto_enqueue_request(struct crypto_async_request *req, + struct ccp_cmd *cmd); +struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, + struct scatterlist *sg_add); + +int ccp_register_aes_algs(struct list_head *head); +int ccp_register_aes_cmac_algs(struct list_head *head); +int ccp_register_aes_xts_algs(struct list_head *head); +int ccp_register_aes_aeads(struct list_head *head); +int ccp_register_sha_algs(struct list_head *head); +int ccp_register_des3_algs(struct list_head *head); +int ccp_register_rsa_algs(struct list_head *head); + +#endif diff --git a/drivers/crypto/ccp/ccp-debugfs.c b/drivers/crypto/ccp/ccp-debugfs.c new file mode 100644 index 000000000..a1055554b --- /dev/null +++ b/drivers/crypto/ccp/ccp-debugfs.c @@ -0,0 +1,323 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2017 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/debugfs.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +/* DebugFS helpers */ +#define OBUFP (obuf + oboff) +#define OBUFLEN 512 +#define OBUFSPC (OBUFLEN - oboff) +#define OSCNPRINTF(fmt, ...) \ + scnprintf(OBUFP, OBUFSPC, fmt, ## __VA_ARGS__) + +#define BUFLEN 63 + +#define RI_VERSION_NUM 0x0000003F +#define RI_AES_PRESENT 0x00000040 +#define RI_3DES_PRESENT 0x00000080 +#define RI_SHA_PRESENT 0x00000100 +#define RI_RSA_PRESENT 0x00000200 +#define RI_ECC_PRESENT 0x00000400 +#define RI_ZDE_PRESENT 0x00000800 +#define RI_ZCE_PRESENT 0x00001000 +#define RI_TRNG_PRESENT 0x00002000 +#define RI_ELFC_PRESENT 0x00004000 +#define RI_ELFC_SHIFT 14 +#define RI_NUM_VQM 0x00078000 +#define RI_NVQM_SHIFT 15 +#define RI_NVQM(r) (((r) * RI_NUM_VQM) >> RI_NVQM_SHIFT) +#define RI_LSB_ENTRIES 0x0FF80000 +#define RI_NLSB_SHIFT 19 +#define RI_NLSB(r) (((r) * RI_LSB_ENTRIES) >> RI_NLSB_SHIFT) + +static ssize_t ccp5_debugfs_info_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct ccp_device *ccp = filp->private_data; + unsigned int oboff = 0; + unsigned int regval; + ssize_t ret; + char *obuf; + + if (!ccp) + return 0; + + obuf = kmalloc(OBUFLEN, GFP_KERNEL); + if (!obuf) + return -ENOMEM; + + oboff += OSCNPRINTF("Device name: %s\n", ccp->name); + oboff += OSCNPRINTF(" RNG name: %s\n", ccp->rngname); + oboff += OSCNPRINTF(" # Queues: %d\n", ccp->cmd_q_count); + oboff += OSCNPRINTF(" # Cmds: %d\n", ccp->cmd_count); + + regval = ioread32(ccp->io_regs + CMD5_PSP_CCP_VERSION); + oboff += OSCNPRINTF(" Version: %d\n", regval & RI_VERSION_NUM); + oboff += OSCNPRINTF(" Engines:"); + if (regval & RI_AES_PRESENT) + oboff += OSCNPRINTF(" AES"); + if (regval & RI_3DES_PRESENT) + oboff += OSCNPRINTF(" 3DES"); + if (regval & RI_SHA_PRESENT) + oboff += OSCNPRINTF(" SHA"); + if (regval & RI_RSA_PRESENT) + oboff += OSCNPRINTF(" RSA"); + if (regval & RI_ECC_PRESENT) + oboff += OSCNPRINTF(" ECC"); + if (regval & RI_ZDE_PRESENT) + oboff += OSCNPRINTF(" ZDE"); + if (regval & RI_ZCE_PRESENT) + oboff += OSCNPRINTF(" ZCE"); + if (regval & RI_TRNG_PRESENT) + oboff += OSCNPRINTF(" TRNG"); + oboff += OSCNPRINTF("\n"); + oboff += OSCNPRINTF(" Queues: %d\n", + (regval & RI_NUM_VQM) >> RI_NVQM_SHIFT); + oboff += OSCNPRINTF("LSB Entries: %d\n", + (regval & RI_LSB_ENTRIES) >> RI_NLSB_SHIFT); + + ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff); + kfree(obuf); + + return ret; +} + +/* Return a formatted buffer containing the current + * statistics across all queues for a CCP. + */ +static ssize_t ccp5_debugfs_stats_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct ccp_device *ccp = filp->private_data; + unsigned long total_xts_aes_ops = 0; + unsigned long total_3des_ops = 0; + unsigned long total_aes_ops = 0; + unsigned long total_sha_ops = 0; + unsigned long total_rsa_ops = 0; + unsigned long total_ecc_ops = 0; + unsigned long total_pt_ops = 0; + unsigned long total_ops = 0; + unsigned int oboff = 0; + ssize_t ret = 0; + unsigned int i; + char *obuf; + + for (i = 0; i < ccp->cmd_q_count; i++) { + struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; + + total_ops += cmd_q->total_ops; + total_aes_ops += cmd_q->total_aes_ops; + total_xts_aes_ops += cmd_q->total_xts_aes_ops; + total_3des_ops += cmd_q->total_3des_ops; + total_sha_ops += cmd_q->total_sha_ops; + total_rsa_ops += cmd_q->total_rsa_ops; + total_pt_ops += cmd_q->total_pt_ops; + total_ecc_ops += cmd_q->total_ecc_ops; + } + + obuf = kmalloc(OBUFLEN, GFP_KERNEL); + if (!obuf) + return -ENOMEM; + + oboff += OSCNPRINTF("Total Interrupts Handled: %ld\n", + ccp->total_interrupts); + oboff += OSCNPRINTF(" Total Operations: %ld\n", + total_ops); + oboff += OSCNPRINTF(" AES: %ld\n", + total_aes_ops); + oboff += OSCNPRINTF(" XTS AES: %ld\n", + total_xts_aes_ops); + oboff += OSCNPRINTF(" SHA: %ld\n", + total_3des_ops); + oboff += OSCNPRINTF(" SHA: %ld\n", + total_sha_ops); + oboff += OSCNPRINTF(" RSA: %ld\n", + total_rsa_ops); + oboff += OSCNPRINTF(" Pass-Thru: %ld\n", + total_pt_ops); + oboff += OSCNPRINTF(" ECC: %ld\n", + total_ecc_ops); + + ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff); + kfree(obuf); + + return ret; +} + +/* Reset the counters in a queue + */ +static void ccp5_debugfs_reset_queue_stats(struct ccp_cmd_queue *cmd_q) +{ + cmd_q->total_ops = 0L; + cmd_q->total_aes_ops = 0L; + cmd_q->total_xts_aes_ops = 0L; + cmd_q->total_3des_ops = 0L; + cmd_q->total_sha_ops = 0L; + cmd_q->total_rsa_ops = 0L; + cmd_q->total_pt_ops = 0L; + cmd_q->total_ecc_ops = 0L; +} + +/* A value was written to the stats variable, which + * should be used to reset the queue counters across + * that device. + */ +static ssize_t ccp5_debugfs_stats_write(struct file *filp, + const char __user *ubuf, + size_t count, loff_t *offp) +{ + struct ccp_device *ccp = filp->private_data; + int i; + + for (i = 0; i < ccp->cmd_q_count; i++) + ccp5_debugfs_reset_queue_stats(&ccp->cmd_q[i]); + ccp->total_interrupts = 0L; + + return count; +} + +/* Return a formatted buffer containing the current information + * for that queue + */ +static ssize_t ccp5_debugfs_queue_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct ccp_cmd_queue *cmd_q = filp->private_data; + unsigned int oboff = 0; + unsigned int regval; + ssize_t ret; + char *obuf; + + if (!cmd_q) + return 0; + + obuf = kmalloc(OBUFLEN, GFP_KERNEL); + if (!obuf) + return -ENOMEM; + + oboff += OSCNPRINTF(" Total Queue Operations: %ld\n", + cmd_q->total_ops); + oboff += OSCNPRINTF(" AES: %ld\n", + cmd_q->total_aes_ops); + oboff += OSCNPRINTF(" XTS AES: %ld\n", + cmd_q->total_xts_aes_ops); + oboff += OSCNPRINTF(" SHA: %ld\n", + cmd_q->total_3des_ops); + oboff += OSCNPRINTF(" SHA: %ld\n", + cmd_q->total_sha_ops); + oboff += OSCNPRINTF(" RSA: %ld\n", + cmd_q->total_rsa_ops); + oboff += OSCNPRINTF(" Pass-Thru: %ld\n", + cmd_q->total_pt_ops); + oboff += OSCNPRINTF(" ECC: %ld\n", + cmd_q->total_ecc_ops); + + regval = ioread32(cmd_q->reg_int_enable); + oboff += OSCNPRINTF(" Enabled Interrupts:"); + if (regval & INT_EMPTY_QUEUE) + oboff += OSCNPRINTF(" EMPTY"); + if (regval & INT_QUEUE_STOPPED) + oboff += OSCNPRINTF(" STOPPED"); + if (regval & INT_ERROR) + oboff += OSCNPRINTF(" ERROR"); + if (regval & INT_COMPLETION) + oboff += OSCNPRINTF(" COMPLETION"); + oboff += OSCNPRINTF("\n"); + + ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff); + kfree(obuf); + + return ret; +} + +/* A value was written to the stats variable for a + * queue. Reset the queue counters to this value. + */ +static ssize_t ccp5_debugfs_queue_write(struct file *filp, + const char __user *ubuf, + size_t count, loff_t *offp) +{ + struct ccp_cmd_queue *cmd_q = filp->private_data; + + ccp5_debugfs_reset_queue_stats(cmd_q); + + return count; +} + +static const struct file_operations ccp_debugfs_info_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = ccp5_debugfs_info_read, + .write = NULL, +}; + +static const struct file_operations ccp_debugfs_queue_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = ccp5_debugfs_queue_read, + .write = ccp5_debugfs_queue_write, +}; + +static const struct file_operations ccp_debugfs_stats_ops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = ccp5_debugfs_stats_read, + .write = ccp5_debugfs_stats_write, +}; + +static struct dentry *ccp_debugfs_dir; +static DEFINE_MUTEX(ccp_debugfs_lock); + +#define MAX_NAME_LEN 20 + +void ccp5_debugfs_setup(struct ccp_device *ccp) +{ + struct ccp_cmd_queue *cmd_q; + char name[MAX_NAME_LEN + 1]; + struct dentry *debugfs_q_instance; + int i; + + if (!debugfs_initialized()) + return; + + mutex_lock(&ccp_debugfs_lock); + if (!ccp_debugfs_dir) + ccp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + mutex_unlock(&ccp_debugfs_lock); + + ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir); + + debugfs_create_file("info", 0400, ccp->debugfs_instance, ccp, + &ccp_debugfs_info_ops); + + debugfs_create_file("stats", 0600, ccp->debugfs_instance, ccp, + &ccp_debugfs_stats_ops); + + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + snprintf(name, MAX_NAME_LEN - 1, "q%d", cmd_q->id); + + debugfs_q_instance = + debugfs_create_dir(name, ccp->debugfs_instance); + + debugfs_create_file("stats", 0600, debugfs_q_instance, cmd_q, + &ccp_debugfs_queue_ops); + } + + return; +} + +void ccp5_debugfs_destroy(void) +{ + debugfs_remove_recursive(ccp_debugfs_dir); +} diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c new file mode 100644 index 000000000..0d5576f6a --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev-v3.c @@ -0,0 +1,598 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/interrupt.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count) +{ + int start; + struct ccp_device *ccp = cmd_q->ccp; + + for (;;) { + mutex_lock(&ccp->sb_mutex); + + start = (u32)bitmap_find_next_zero_area(ccp->sb, + ccp->sb_count, + ccp->sb_start, + count, 0); + if (start <= ccp->sb_count) { + bitmap_set(ccp->sb, start, count); + + mutex_unlock(&ccp->sb_mutex); + break; + } + + ccp->sb_avail = 0; + + mutex_unlock(&ccp->sb_mutex); + + /* Wait for KSB entries to become available */ + if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail)) + return 0; + } + + return KSB_START + start; +} + +static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start, + unsigned int count) +{ + struct ccp_device *ccp = cmd_q->ccp; + + if (!start) + return; + + mutex_lock(&ccp->sb_mutex); + + bitmap_clear(ccp->sb, start - KSB_START, count); + + ccp->sb_avail = 1; + + mutex_unlock(&ccp->sb_mutex); + + wake_up_interruptible_all(&ccp->sb_queue); +} + +static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q) +{ + return CMD_Q_DEPTH(ioread32(cmd_q->reg_status)); +} + +static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count) +{ + struct ccp_cmd_queue *cmd_q = op->cmd_q; + struct ccp_device *ccp = cmd_q->ccp; + void __iomem *cr_addr; + u32 cr0, cmd; + unsigned int i; + int ret = 0; + + /* We could read a status register to see how many free slots + * are actually available, but reading that register resets it + * and you could lose some error information. + */ + cmd_q->free_slots--; + + cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT) + | (op->jobid << REQ0_JOBID_SHIFT) + | REQ0_WAIT_FOR_WRITE; + + if (op->soc) + cr0 |= REQ0_STOP_ON_COMPLETE + | REQ0_INT_ON_COMPLETE; + + if (op->ioc || !cmd_q->free_slots) + cr0 |= REQ0_INT_ON_COMPLETE; + + /* Start at CMD_REQ1 */ + cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR; + + mutex_lock(&ccp->req_mutex); + + /* Write CMD_REQ1 through CMD_REQx first */ + for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR) + iowrite32(*(cr + i), cr_addr); + + /* Tell the CCP to start */ + wmb(); + iowrite32(cr0, ccp->io_regs + CMD_REQ0); + + mutex_unlock(&ccp->req_mutex); + + if (cr0 & REQ0_INT_ON_COMPLETE) { + /* Wait for the job to complete */ + ret = wait_event_interruptible(cmd_q->int_queue, + cmd_q->int_rcvd); + if (ret || cmd_q->cmd_error) { + /* On error delete all related jobs from the queue */ + cmd = (cmd_q->id << DEL_Q_ID_SHIFT) + | op->jobid; + if (cmd_q->cmd_error) + ccp_log_error(cmd_q->ccp, + cmd_q->cmd_error); + + iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); + + if (!ret) + ret = -EIO; + } else if (op->soc) { + /* Delete just head job from the queue on SoC */ + cmd = DEL_Q_ACTIVE + | (cmd_q->id << DEL_Q_ID_SHIFT) + | op->jobid; + + iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB); + } + + cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status); + + cmd_q->int_rcvd = 0; + } + + return ret; +} + +static int ccp_perform_aes(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT) + | (op->u.aes.type << REQ1_AES_TYPE_SHIFT) + | (op->u.aes.mode << REQ1_AES_MODE_SHIFT) + | (op->u.aes.action << REQ1_AES_ACTION_SHIFT) + | (op->sb_key << REQ1_KEY_KSB_SHIFT); + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + if (op->u.aes.mode == CCP_AES_MODE_CFB) + cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT); + + if (op->eom) + cr[0] |= REQ1_EOM; + + if (op->init) + cr[0] |= REQ1_INIT; + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_xts_aes(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT) + | (op->u.xts.action << REQ1_AES_ACTION_SHIFT) + | (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT) + | (op->sb_key << REQ1_KEY_KSB_SHIFT); + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + if (op->eom) + cr[0] |= REQ1_EOM; + + if (op->init) + cr[0] |= REQ1_INIT; + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_sha(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT) + | (op->u.sha.type << REQ1_SHA_TYPE_SHIFT) + | REQ1_INIT; + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + + if (op->eom) { + cr[0] |= REQ1_EOM; + cr[4] = lower_32_bits(op->u.sha.msg_bits); + cr[5] = upper_32_bits(op->u.sha.msg_bits); + } else { + cr[4] = 0; + cr[5] = 0; + } + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_rsa(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT) + | (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT) + | (op->sb_key << REQ1_KEY_KSB_SHIFT) + | REQ1_EOM; + cr[1] = op->u.rsa.input_len - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT) + | (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_passthru(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT) + | (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT) + | (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT); + + if (op->src.type == CCP_MEMTYPE_SYSTEM) + cr[1] = op->src.u.dma.length - 1; + else + cr[1] = op->dst.u.dma.length - 1; + + if (op->src.type == CCP_MEMTYPE_SYSTEM) { + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + + if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) + cr[3] |= (op->sb_key << REQ4_KSB_SHIFT); + } else { + cr[2] = op->src.u.sb * CCP_SB_BYTES; + cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT); + } + + if (op->dst.type == CCP_MEMTYPE_SYSTEM) { + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + } else { + cr[4] = op->dst.u.sb * CCP_SB_BYTES; + cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT); + } + + if (op->eom) + cr[0] |= REQ1_EOM; + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static int ccp_perform_ecc(struct ccp_op *op) +{ + u32 cr[6]; + + /* Fill out the register contents for REQ1 through REQ6 */ + cr[0] = REQ1_ECC_AFFINE_CONVERT + | (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT) + | (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT) + | REQ1_EOM; + cr[1] = op->src.u.dma.length - 1; + cr[2] = ccp_addr_lo(&op->src.u.dma); + cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->src.u.dma); + cr[4] = ccp_addr_lo(&op->dst.u.dma); + cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT) + | ccp_addr_hi(&op->dst.u.dma); + + return ccp_do_cmd(op, cr, ARRAY_SIZE(cr)); +} + +static void ccp_disable_queue_interrupts(struct ccp_device *ccp) +{ + iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG); +} + +static void ccp_enable_queue_interrupts(struct ccp_device *ccp) +{ + iowrite32(ccp->qim, ccp->io_regs + IRQ_MASK_REG); +} + +static void ccp_irq_bh(unsigned long data) +{ + struct ccp_device *ccp = (struct ccp_device *)data; + struct ccp_cmd_queue *cmd_q; + u32 q_int, status; + unsigned int i; + + status = ioread32(ccp->io_regs + IRQ_STATUS_REG); + + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + q_int = status & (cmd_q->int_ok | cmd_q->int_err); + if (q_int) { + cmd_q->int_status = status; + cmd_q->q_status = ioread32(cmd_q->reg_status); + cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); + + /* On error, only save the first error value */ + if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error) + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + + cmd_q->int_rcvd = 1; + + /* Acknowledge the interrupt and wake the kthread */ + iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG); + wake_up_interruptible(&cmd_q->int_queue); + } + } + ccp_enable_queue_interrupts(ccp); +} + +static irqreturn_t ccp_irq_handler(int irq, void *data) +{ + struct ccp_device *ccp = (struct ccp_device *)data; + + ccp_disable_queue_interrupts(ccp); + if (ccp->use_tasklet) + tasklet_schedule(&ccp->irq_tasklet); + else + ccp_irq_bh((unsigned long)ccp); + + return IRQ_HANDLED; +} + +static int ccp_init(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct ccp_cmd_queue *cmd_q; + struct dma_pool *dma_pool; + char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; + unsigned int qmr, i; + int ret; + + /* Find available queues */ + ccp->qim = 0; + qmr = ioread32(ccp->io_regs + Q_MASK_REG); + for (i = 0; (i < MAX_HW_QUEUES) && (ccp->cmd_q_count < ccp->max_q_count); i++) { + if (!(qmr & (1 << i))) + continue; + + /* Allocate a dma pool for this queue */ + snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", + ccp->name, i); + dma_pool = dma_pool_create(dma_pool_name, dev, + CCP_DMAPOOL_MAX_SIZE, + CCP_DMAPOOL_ALIGN, 0); + if (!dma_pool) { + dev_err(dev, "unable to allocate dma pool\n"); + ret = -ENOMEM; + goto e_pool; + } + + cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; + ccp->cmd_q_count++; + + cmd_q->ccp = ccp; + cmd_q->id = i; + cmd_q->dma_pool = dma_pool; + + /* Reserve 2 KSB regions for the queue */ + cmd_q->sb_key = KSB_START + ccp->sb_start++; + cmd_q->sb_ctx = KSB_START + ccp->sb_start++; + ccp->sb_count -= 2; + + /* Preset some register values and masks that are queue + * number dependent + */ + cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE + + (CMD_Q_STATUS_INCR * i); + cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE + + (CMD_Q_STATUS_INCR * i); + cmd_q->int_ok = 1 << (i * 2); + cmd_q->int_err = 1 << ((i * 2) + 1); + + cmd_q->free_slots = ccp_get_free_slots(cmd_q); + + init_waitqueue_head(&cmd_q->int_queue); + + /* Build queue interrupt mask (two interrupts per queue) */ + ccp->qim |= cmd_q->int_ok | cmd_q->int_err; + +#ifdef CONFIG_ARM64 + /* For arm64 set the recommended queue cache settings */ + iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE + + (CMD_Q_CACHE_INC * i)); +#endif + + dev_dbg(dev, "queue #%u available\n", i); + } + if (ccp->cmd_q_count == 0) { + dev_notice(dev, "no command queues available\n"); + ret = -EIO; + goto e_pool; + } + dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count); + + /* Disable and clear interrupts until ready */ + ccp_disable_queue_interrupts(ccp); + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + } + iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG); + + /* Request an irq */ + ret = sp_request_ccp_irq(ccp->sp, ccp_irq_handler, ccp->name, ccp); + if (ret) { + dev_err(dev, "unable to allocate an IRQ\n"); + goto e_pool; + } + + /* Initialize the ISR tasklet? */ + if (ccp->use_tasklet) + tasklet_init(&ccp->irq_tasklet, ccp_irq_bh, + (unsigned long)ccp); + + dev_dbg(dev, "Starting threads...\n"); + /* Create a kthread for each queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + struct task_struct *kthread; + + cmd_q = &ccp->cmd_q[i]; + + kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, + "%s-q%u", ccp->name, cmd_q->id); + if (IS_ERR(kthread)) { + dev_err(dev, "error creating queue thread (%ld)\n", + PTR_ERR(kthread)); + ret = PTR_ERR(kthread); + goto e_kthread; + } + + cmd_q->kthread = kthread; + wake_up_process(kthread); + } + + dev_dbg(dev, "Enabling interrupts...\n"); + /* Enable interrupts */ + ccp_enable_queue_interrupts(ccp); + + dev_dbg(dev, "Registering device...\n"); + ccp_add_device(ccp); + + ret = ccp_register_rng(ccp); + if (ret) + goto e_kthread; + + /* Register the DMA engine support */ + ret = ccp_dmaengine_register(ccp); + if (ret) + goto e_hwrng; + + return 0; + +e_hwrng: + ccp_unregister_rng(ccp); + +e_kthread: + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + + sp_free_ccp_irq(ccp->sp, ccp); + +e_pool: + for (i = 0; i < ccp->cmd_q_count; i++) + dma_pool_destroy(ccp->cmd_q[i].dma_pool); + + return ret; +} + +static void ccp_destroy(struct ccp_device *ccp) +{ + struct ccp_cmd_queue *cmd_q; + struct ccp_cmd *cmd; + unsigned int i; + + /* Unregister the DMA engine */ + ccp_dmaengine_unregister(ccp); + + /* Unregister the RNG */ + ccp_unregister_rng(ccp); + + /* Remove this device from the list of available units */ + ccp_del_device(ccp); + + /* Disable and clear interrupts */ + ccp_disable_queue_interrupts(ccp); + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + } + iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG); + + /* Stop the queue kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + + sp_free_ccp_irq(ccp->sp, ccp); + + for (i = 0; i < ccp->cmd_q_count; i++) + dma_pool_destroy(ccp->cmd_q[i].dma_pool); + + /* Flush the cmd and backlog queue */ + while (!list_empty(&ccp->cmd)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } + while (!list_empty(&ccp->backlog)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } +} + +static const struct ccp_actions ccp3_actions = { + .aes = ccp_perform_aes, + .xts_aes = ccp_perform_xts_aes, + .des3 = NULL, + .sha = ccp_perform_sha, + .rsa = ccp_perform_rsa, + .passthru = ccp_perform_passthru, + .ecc = ccp_perform_ecc, + .sballoc = ccp_alloc_ksb, + .sbfree = ccp_free_ksb, + .init = ccp_init, + .destroy = ccp_destroy, + .get_free_slots = ccp_get_free_slots, + .irqhandler = ccp_irq_handler, +}; + +const struct ccp_vdata ccpv3_platform = { + .version = CCP_VERSION(3, 0), + .setup = NULL, + .perform = &ccp3_actions, + .offset = 0, + .rsamax = CCP_RSA_MAX_WIDTH, +}; + +const struct ccp_vdata ccpv3 = { + .version = CCP_VERSION(3, 0), + .setup = NULL, + .perform = &ccp3_actions, + .offset = 0x20000, + .rsamax = CCP_RSA_MAX_WIDTH, +}; diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c new file mode 100644 index 000000000..7838f63ba --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev-v5.c @@ -0,0 +1,1129 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2016,2019 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/dma-mapping.h> +#include <linux/interrupt.h> +#include <linux/compiler.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +/* Allocate the requested number of contiguous LSB slots + * from the LSB bitmap. Look in the private range for this + * queue first; failing that, check the public area. + * If no space is available, wait around. + * Return: first slot number + */ +static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count) +{ + struct ccp_device *ccp; + int start; + + /* First look at the map for the queue */ + if (cmd_q->lsb >= 0) { + start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap, + LSB_SIZE, + 0, count, 0); + if (start < LSB_SIZE) { + bitmap_set(cmd_q->lsbmap, start, count); + return start + cmd_q->lsb * LSB_SIZE; + } + } + + /* No joy; try to get an entry from the shared blocks */ + ccp = cmd_q->ccp; + for (;;) { + mutex_lock(&ccp->sb_mutex); + + start = (u32)bitmap_find_next_zero_area(ccp->lsbmap, + MAX_LSB_CNT * LSB_SIZE, + 0, + count, 0); + if (start <= MAX_LSB_CNT * LSB_SIZE) { + bitmap_set(ccp->lsbmap, start, count); + + mutex_unlock(&ccp->sb_mutex); + return start; + } + + ccp->sb_avail = 0; + + mutex_unlock(&ccp->sb_mutex); + + /* Wait for KSB entries to become available */ + if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail)) + return 0; + } +} + +/* Free a number of LSB slots from the bitmap, starting at + * the indicated starting slot number. + */ +static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start, + unsigned int count) +{ + if (!start) + return; + + if (cmd_q->lsb == start) { + /* An entry from the private LSB */ + bitmap_clear(cmd_q->lsbmap, start, count); + } else { + /* From the shared LSBs */ + struct ccp_device *ccp = cmd_q->ccp; + + mutex_lock(&ccp->sb_mutex); + bitmap_clear(ccp->lsbmap, start, count); + ccp->sb_avail = 1; + mutex_unlock(&ccp->sb_mutex); + wake_up_interruptible_all(&ccp->sb_queue); + } +} + +/* CCP version 5: Union to define the function field (cmd_reg1/dword0) */ +union ccp_function { + struct { + u16 size:7; + u16 encrypt:1; + u16 mode:5; + u16 type:2; + } aes; + struct { + u16 size:7; + u16 encrypt:1; + u16 rsvd:5; + u16 type:2; + } aes_xts; + struct { + u16 size:7; + u16 encrypt:1; + u16 mode:5; + u16 type:2; + } des3; + struct { + u16 rsvd1:10; + u16 type:4; + u16 rsvd2:1; + } sha; + struct { + u16 mode:3; + u16 size:12; + } rsa; + struct { + u16 byteswap:2; + u16 bitwise:3; + u16 reflect:2; + u16 rsvd:8; + } pt; + struct { + u16 rsvd:13; + } zlib; + struct { + u16 size:10; + u16 type:2; + u16 mode:3; + } ecc; + u16 raw; +}; + +#define CCP_AES_SIZE(p) ((p)->aes.size) +#define CCP_AES_ENCRYPT(p) ((p)->aes.encrypt) +#define CCP_AES_MODE(p) ((p)->aes.mode) +#define CCP_AES_TYPE(p) ((p)->aes.type) +#define CCP_XTS_SIZE(p) ((p)->aes_xts.size) +#define CCP_XTS_TYPE(p) ((p)->aes_xts.type) +#define CCP_XTS_ENCRYPT(p) ((p)->aes_xts.encrypt) +#define CCP_DES3_SIZE(p) ((p)->des3.size) +#define CCP_DES3_ENCRYPT(p) ((p)->des3.encrypt) +#define CCP_DES3_MODE(p) ((p)->des3.mode) +#define CCP_DES3_TYPE(p) ((p)->des3.type) +#define CCP_SHA_TYPE(p) ((p)->sha.type) +#define CCP_RSA_SIZE(p) ((p)->rsa.size) +#define CCP_PT_BYTESWAP(p) ((p)->pt.byteswap) +#define CCP_PT_BITWISE(p) ((p)->pt.bitwise) +#define CCP_ECC_MODE(p) ((p)->ecc.mode) +#define CCP_ECC_AFFINE(p) ((p)->ecc.one) + +/* Word 0 */ +#define CCP5_CMD_DW0(p) ((p)->dw0) +#define CCP5_CMD_SOC(p) (CCP5_CMD_DW0(p).soc) +#define CCP5_CMD_IOC(p) (CCP5_CMD_DW0(p).ioc) +#define CCP5_CMD_INIT(p) (CCP5_CMD_DW0(p).init) +#define CCP5_CMD_EOM(p) (CCP5_CMD_DW0(p).eom) +#define CCP5_CMD_FUNCTION(p) (CCP5_CMD_DW0(p).function) +#define CCP5_CMD_ENGINE(p) (CCP5_CMD_DW0(p).engine) +#define CCP5_CMD_PROT(p) (CCP5_CMD_DW0(p).prot) + +/* Word 1 */ +#define CCP5_CMD_DW1(p) ((p)->length) +#define CCP5_CMD_LEN(p) (CCP5_CMD_DW1(p)) + +/* Word 2 */ +#define CCP5_CMD_DW2(p) ((p)->src_lo) +#define CCP5_CMD_SRC_LO(p) (CCP5_CMD_DW2(p)) + +/* Word 3 */ +#define CCP5_CMD_DW3(p) ((p)->dw3) +#define CCP5_CMD_SRC_MEM(p) ((p)->dw3.src_mem) +#define CCP5_CMD_SRC_HI(p) ((p)->dw3.src_hi) +#define CCP5_CMD_LSB_ID(p) ((p)->dw3.lsb_cxt_id) +#define CCP5_CMD_FIX_SRC(p) ((p)->dw3.fixed) + +/* Words 4/5 */ +#define CCP5_CMD_DW4(p) ((p)->dw4) +#define CCP5_CMD_DST_LO(p) (CCP5_CMD_DW4(p).dst_lo) +#define CCP5_CMD_DW5(p) ((p)->dw5.fields.dst_hi) +#define CCP5_CMD_DST_HI(p) (CCP5_CMD_DW5(p)) +#define CCP5_CMD_DST_MEM(p) ((p)->dw5.fields.dst_mem) +#define CCP5_CMD_FIX_DST(p) ((p)->dw5.fields.fixed) +#define CCP5_CMD_SHA_LO(p) ((p)->dw4.sha_len_lo) +#define CCP5_CMD_SHA_HI(p) ((p)->dw5.sha_len_hi) + +/* Word 6/7 */ +#define CCP5_CMD_DW6(p) ((p)->key_lo) +#define CCP5_CMD_KEY_LO(p) (CCP5_CMD_DW6(p)) +#define CCP5_CMD_DW7(p) ((p)->dw7) +#define CCP5_CMD_KEY_HI(p) ((p)->dw7.key_hi) +#define CCP5_CMD_KEY_MEM(p) ((p)->dw7.key_mem) + +static inline u32 low_address(unsigned long addr) +{ + return (u64)addr & 0x0ffffffff; +} + +static inline u32 high_address(unsigned long addr) +{ + return ((u64)addr >> 32) & 0x00000ffff; +} + +static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q) +{ + unsigned int head_idx, n; + u32 head_lo, queue_start; + + queue_start = low_address(cmd_q->qdma_tail); + head_lo = ioread32(cmd_q->reg_head_lo); + head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc); + + n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1; + + return n % COMMANDS_PER_QUEUE; /* Always one unused spot */ +} + +static int ccp5_do_cmd(struct ccp5_desc *desc, + struct ccp_cmd_queue *cmd_q) +{ + __le32 *mP; + u32 *dP; + u32 tail; + int i; + int ret = 0; + + cmd_q->total_ops++; + + if (CCP5_CMD_SOC(desc)) { + CCP5_CMD_IOC(desc) = 1; + CCP5_CMD_SOC(desc) = 0; + } + mutex_lock(&cmd_q->q_mutex); + + mP = (__le32 *)&cmd_q->qbase[cmd_q->qidx]; + dP = (u32 *)desc; + for (i = 0; i < 8; i++) + mP[i] = cpu_to_le32(dP[i]); /* handle endianness */ + + cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE; + + /* The data used by this command must be flushed to memory */ + wmb(); + + /* Write the new tail address back to the queue register */ + tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE); + iowrite32(tail, cmd_q->reg_tail_lo); + + /* Turn the queue back on using our cached control register */ + iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control); + mutex_unlock(&cmd_q->q_mutex); + + if (CCP5_CMD_IOC(desc)) { + /* Wait for the job to complete */ + ret = wait_event_interruptible(cmd_q->int_queue, + cmd_q->int_rcvd); + if (ret || cmd_q->cmd_error) { + /* Log the error and flush the queue by + * moving the head pointer + */ + if (cmd_q->cmd_error) + ccp_log_error(cmd_q->ccp, + cmd_q->cmd_error); + iowrite32(tail, cmd_q->reg_head_lo); + if (!ret) + ret = -EIO; + } + cmd_q->int_rcvd = 0; + } + + return ret; +} + +static int ccp5_perform_aes(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + u32 key_addr = op->sb_key * LSB_ITEM_SIZE; + + op->cmd_q->total_aes_ops++; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = op->init; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_AES_ENCRYPT(&function) = op->u.aes.action; + CCP_AES_MODE(&function) = op->u.aes.mode; + CCP_AES_TYPE(&function) = op->u.aes.type; + CCP_AES_SIZE(&function) = op->u.aes.size; + + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_KEY_HI(&desc) = 0; + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; + CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_xts_aes(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + u32 key_addr = op->sb_key * LSB_ITEM_SIZE; + + op->cmd_q->total_xts_aes_ops++; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = op->init; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_XTS_TYPE(&function) = op->u.xts.type; + CCP_XTS_ENCRYPT(&function) = op->u.xts.action; + CCP_XTS_SIZE(&function) = op->u.xts.unit_size; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_KEY_HI(&desc) = 0; + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; + CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_sha(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + + op->cmd_q->total_sha_ops++; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = 1; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_SHA_TYPE(&function) = op->u.sha.type; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; + + if (op->eom) { + CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits); + CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits); + } else { + CCP5_CMD_SHA_LO(&desc) = 0; + CCP5_CMD_SHA_HI(&desc) = 0; + } + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_des3(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + u32 key_addr = op->sb_key * LSB_ITEM_SIZE; + + op->cmd_q->total_3des_ops++; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, sizeof(struct ccp5_desc)); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_DES3; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = op->init; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_DES3_ENCRYPT(&function) = op->u.des3.action; + CCP_DES3_MODE(&function) = op->u.des3.mode; + CCP_DES3_TYPE(&function) = op->u.des3.type; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_KEY_HI(&desc) = 0; + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB; + CCP5_CMD_LSB_ID(&desc) = op->sb_ctx; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_rsa(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + + op->cmd_q->total_rsa_ops++; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA; + + CCP5_CMD_SOC(&desc) = op->soc; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = 0; + CCP5_CMD_EOM(&desc) = 1; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_RSA_SIZE(&function) = (op->u.rsa.mod_size + 7) >> 3; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->u.rsa.input_len; + + /* Source is from external memory */ + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + /* Destination is in external memory */ + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + /* Key (Exponent) is in external memory */ + CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma); + CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma); + CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_passthru(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + struct ccp_dma_info *saddr = &op->src.u.dma; + struct ccp_dma_info *daddr = &op->dst.u.dma; + + + op->cmd_q->total_pt_ops++; + + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU; + + CCP5_CMD_SOC(&desc) = 0; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = 0; + CCP5_CMD_EOM(&desc) = op->eom; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap; + CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + /* Length of source data is always 256 bytes */ + if (op->src.type == CCP_MEMTYPE_SYSTEM) + CCP5_CMD_LEN(&desc) = saddr->length; + else + CCP5_CMD_LEN(&desc) = daddr->length; + + if (op->src.type == CCP_MEMTYPE_SYSTEM) { + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP) + CCP5_CMD_LSB_ID(&desc) = op->sb_key; + } else { + u32 key_addr = op->src.u.sb * CCP_SB_BYTES; + + CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_SRC_HI(&desc) = 0; + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB; + } + + if (op->dst.type == CCP_MEMTYPE_SYSTEM) { + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + } else { + u32 key_addr = op->dst.u.sb * CCP_SB_BYTES; + + CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr); + CCP5_CMD_DST_HI(&desc) = 0; + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB; + } + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp5_perform_ecc(struct ccp_op *op) +{ + struct ccp5_desc desc; + union ccp_function function; + + op->cmd_q->total_ecc_ops++; + + /* Zero out all the fields of the command desc */ + memset(&desc, 0, Q_DESC_SIZE); + + CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC; + + CCP5_CMD_SOC(&desc) = 0; + CCP5_CMD_IOC(&desc) = 1; + CCP5_CMD_INIT(&desc) = 0; + CCP5_CMD_EOM(&desc) = 1; + CCP5_CMD_PROT(&desc) = 0; + + function.raw = 0; + function.ecc.mode = op->u.ecc.function; + CCP5_CMD_FUNCTION(&desc) = function.raw; + + CCP5_CMD_LEN(&desc) = op->src.u.dma.length; + + CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma); + CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma); + CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma); + CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma); + CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM; + + return ccp5_do_cmd(&desc, op->cmd_q); +} + +static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status) +{ + int q_mask = 1 << cmd_q->id; + int queues = 0; + int j; + + /* Build a bit mask to know which LSBs this queue has access to. + * Don't bother with segment 0 as it has special privileges. + */ + for (j = 1; j < MAX_LSB_CNT; j++) { + if (status & q_mask) + bitmap_set(cmd_q->lsbmask, j, 1); + status >>= LSB_REGION_WIDTH; + } + queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT); + dev_dbg(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n", + cmd_q->id, queues); + + return queues ? 0 : -EINVAL; +} + +static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp, + int lsb_cnt, int n_lsbs, + unsigned long *lsb_pub) +{ + DECLARE_BITMAP(qlsb, MAX_LSB_CNT); + int bitno; + int qlsb_wgt; + int i; + + /* For each queue: + * If the count of potential LSBs available to a queue matches the + * ordinal given to us in lsb_cnt: + * Copy the mask of possible LSBs for this queue into "qlsb"; + * For each bit in qlsb, see if the corresponding bit in the + * aggregation mask is set; if so, we have a match. + * If we have a match, clear the bit in the aggregation to + * mark it as no longer available. + * If there is no match, clear the bit in qlsb and keep looking. + */ + for (i = 0; i < ccp->cmd_q_count; i++) { + struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; + + qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT); + + if (qlsb_wgt == lsb_cnt) { + bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT); + + bitno = find_first_bit(qlsb, MAX_LSB_CNT); + while (bitno < MAX_LSB_CNT) { + if (test_bit(bitno, lsb_pub)) { + /* We found an available LSB + * that this queue can access + */ + cmd_q->lsb = bitno; + bitmap_clear(lsb_pub, bitno, 1); + dev_dbg(ccp->dev, + "Queue %d gets LSB %d\n", + i, bitno); + break; + } + bitmap_clear(qlsb, bitno, 1); + bitno = find_first_bit(qlsb, MAX_LSB_CNT); + } + if (bitno >= MAX_LSB_CNT) + return -EINVAL; + n_lsbs--; + } + } + return n_lsbs; +} + +/* For each queue, from the most- to least-constrained: + * find an LSB that can be assigned to the queue. If there are N queues that + * can only use M LSBs, where N > M, fail; otherwise, every queue will get a + * dedicated LSB. Remaining LSB regions become a shared resource. + * If we have fewer LSBs than queues, all LSB regions become shared resources. + */ +static int ccp_assign_lsbs(struct ccp_device *ccp) +{ + DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT); + DECLARE_BITMAP(qlsb, MAX_LSB_CNT); + int n_lsbs = 0; + int bitno; + int i, lsb_cnt; + int rc = 0; + + bitmap_zero(lsb_pub, MAX_LSB_CNT); + + /* Create an aggregate bitmap to get a total count of available LSBs */ + for (i = 0; i < ccp->cmd_q_count; i++) + bitmap_or(lsb_pub, + lsb_pub, ccp->cmd_q[i].lsbmask, + MAX_LSB_CNT); + + n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT); + + if (n_lsbs >= ccp->cmd_q_count) { + /* We have enough LSBS to give every queue a private LSB. + * Brute force search to start with the queues that are more + * constrained in LSB choice. When an LSB is privately + * assigned, it is removed from the public mask. + * This is an ugly N squared algorithm with some optimization. + */ + for (lsb_cnt = 1; + n_lsbs && (lsb_cnt <= MAX_LSB_CNT); + lsb_cnt++) { + rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs, + lsb_pub); + if (rc < 0) + return -EINVAL; + n_lsbs = rc; + } + } + + rc = 0; + /* What's left of the LSBs, according to the public mask, now become + * shared. Any zero bits in the lsb_pub mask represent an LSB region + * that can't be used as a shared resource, so mark the LSB slots for + * them as "in use". + */ + bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT); + + bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT); + while (bitno < MAX_LSB_CNT) { + bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE); + bitmap_set(qlsb, bitno, 1); + bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT); + } + + return rc; +} + +static void ccp5_disable_queue_interrupts(struct ccp_device *ccp) +{ + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) + iowrite32(0x0, ccp->cmd_q[i].reg_int_enable); +} + +static void ccp5_enable_queue_interrupts(struct ccp_device *ccp) +{ + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) + iowrite32(SUPPORTED_INTERRUPTS, ccp->cmd_q[i].reg_int_enable); +} + +static void ccp5_irq_bh(unsigned long data) +{ + struct ccp_device *ccp = (struct ccp_device *)data; + u32 status; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i]; + + status = ioread32(cmd_q->reg_interrupt_status); + + if (status) { + cmd_q->int_status = status; + cmd_q->q_status = ioread32(cmd_q->reg_status); + cmd_q->q_int_status = ioread32(cmd_q->reg_int_status); + + /* On error, only save the first error value */ + if ((status & INT_ERROR) && !cmd_q->cmd_error) + cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status); + + cmd_q->int_rcvd = 1; + + /* Acknowledge the interrupt and wake the kthread */ + iowrite32(status, cmd_q->reg_interrupt_status); + wake_up_interruptible(&cmd_q->int_queue); + } + } + ccp5_enable_queue_interrupts(ccp); +} + +static irqreturn_t ccp5_irq_handler(int irq, void *data) +{ + struct ccp_device *ccp = (struct ccp_device *)data; + + ccp5_disable_queue_interrupts(ccp); + ccp->total_interrupts++; + if (ccp->use_tasklet) + tasklet_schedule(&ccp->irq_tasklet); + else + ccp5_irq_bh((unsigned long)ccp); + return IRQ_HANDLED; +} + +static int ccp5_init(struct ccp_device *ccp) +{ + struct device *dev = ccp->dev; + struct ccp_cmd_queue *cmd_q; + struct dma_pool *dma_pool; + char dma_pool_name[MAX_DMAPOOL_NAME_LEN]; + unsigned int qmr, i; + u64 status; + u32 status_lo, status_hi; + int ret; + + /* Find available queues */ + qmr = ioread32(ccp->io_regs + Q_MASK_REG); + /* + * Check for a access to the registers. If this read returns + * 0xffffffff, it's likely that the system is running a broken + * BIOS which disallows access to the device. Stop here and fail + * the initialization (but not the load, as the PSP could get + * properly initialized). + */ + if (qmr == 0xffffffff) { + dev_notice(dev, "ccp: unable to access the device: you might be running a broken BIOS.\n"); + return 1; + } + + for (i = 0; (i < MAX_HW_QUEUES) && (ccp->cmd_q_count < ccp->max_q_count); i++) { + if (!(qmr & (1 << i))) + continue; + + /* Allocate a dma pool for this queue */ + snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d", + ccp->name, i); + dma_pool = dma_pool_create(dma_pool_name, dev, + CCP_DMAPOOL_MAX_SIZE, + CCP_DMAPOOL_ALIGN, 0); + if (!dma_pool) { + dev_err(dev, "unable to allocate dma pool\n"); + ret = -ENOMEM; + goto e_pool; + } + + cmd_q = &ccp->cmd_q[ccp->cmd_q_count]; + ccp->cmd_q_count++; + + cmd_q->ccp = ccp; + cmd_q->id = i; + cmd_q->dma_pool = dma_pool; + mutex_init(&cmd_q->q_mutex); + + /* Page alignment satisfies our needs for N <= 128 */ + BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128); + cmd_q->qsize = Q_SIZE(Q_DESC_SIZE); + cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize, + &cmd_q->qbase_dma, + GFP_KERNEL); + if (!cmd_q->qbase) { + dev_err(dev, "unable to allocate command queue\n"); + ret = -ENOMEM; + goto e_pool; + } + + cmd_q->qidx = 0; + /* Preset some register values and masks that are queue + * number dependent + */ + cmd_q->reg_control = ccp->io_regs + + CMD5_Q_STATUS_INCR * (i + 1); + cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE; + cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE; + cmd_q->reg_int_enable = cmd_q->reg_control + + CMD5_Q_INT_ENABLE_BASE; + cmd_q->reg_interrupt_status = cmd_q->reg_control + + CMD5_Q_INTERRUPT_STATUS_BASE; + cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE; + cmd_q->reg_int_status = cmd_q->reg_control + + CMD5_Q_INT_STATUS_BASE; + cmd_q->reg_dma_status = cmd_q->reg_control + + CMD5_Q_DMA_STATUS_BASE; + cmd_q->reg_dma_read_status = cmd_q->reg_control + + CMD5_Q_DMA_READ_STATUS_BASE; + cmd_q->reg_dma_write_status = cmd_q->reg_control + + CMD5_Q_DMA_WRITE_STATUS_BASE; + + init_waitqueue_head(&cmd_q->int_queue); + + dev_dbg(dev, "queue #%u available\n", i); + } + + if (ccp->cmd_q_count == 0) { + dev_notice(dev, "no command queues available\n"); + ret = 1; + goto e_pool; + } + + /* Turn off the queues and disable interrupts until ready */ + ccp5_disable_queue_interrupts(ccp); + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + cmd_q->qcontrol = 0; /* Start with nothing */ + iowrite32(cmd_q->qcontrol, cmd_q->reg_control); + + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + + /* Clear the interrupt status */ + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status); + } + + dev_dbg(dev, "Requesting an IRQ...\n"); + /* Request an irq */ + ret = sp_request_ccp_irq(ccp->sp, ccp5_irq_handler, ccp->name, ccp); + if (ret) { + dev_err(dev, "unable to allocate an IRQ\n"); + goto e_pool; + } + /* Initialize the ISR tasklet */ + if (ccp->use_tasklet) + tasklet_init(&ccp->irq_tasklet, ccp5_irq_bh, + (unsigned long)ccp); + + dev_dbg(dev, "Loading LSB map...\n"); + /* Copy the private LSB mask to the public registers */ + status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET); + status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET); + iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET); + iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET); + status = ((u64)status_hi<<30) | (u64)status_lo; + + dev_dbg(dev, "Configuring virtual queues...\n"); + /* Configure size of each virtual queue accessible to host */ + for (i = 0; i < ccp->cmd_q_count; i++) { + u32 dma_addr_lo; + u32 dma_addr_hi; + + cmd_q = &ccp->cmd_q[i]; + + cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT); + cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT; + + cmd_q->qdma_tail = cmd_q->qbase_dma; + dma_addr_lo = low_address(cmd_q->qdma_tail); + iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo); + iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo); + + dma_addr_hi = high_address(cmd_q->qdma_tail); + cmd_q->qcontrol |= (dma_addr_hi << 16); + iowrite32(cmd_q->qcontrol, cmd_q->reg_control); + + /* Find the LSB regions accessible to the queue */ + ccp_find_lsb_regions(cmd_q, status); + cmd_q->lsb = -1; /* Unassigned value */ + } + + dev_dbg(dev, "Assigning LSBs...\n"); + ret = ccp_assign_lsbs(ccp); + if (ret) { + dev_err(dev, "Unable to assign LSBs (%d)\n", ret); + goto e_irq; + } + + /* Optimization: pre-allocate LSB slots for each queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2); + ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2); + } + + dev_dbg(dev, "Starting threads...\n"); + /* Create a kthread for each queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + struct task_struct *kthread; + + cmd_q = &ccp->cmd_q[i]; + + kthread = kthread_create(ccp_cmd_queue_thread, cmd_q, + "%s-q%u", ccp->name, cmd_q->id); + if (IS_ERR(kthread)) { + dev_err(dev, "error creating queue thread (%ld)\n", + PTR_ERR(kthread)); + ret = PTR_ERR(kthread); + goto e_kthread; + } + + cmd_q->kthread = kthread; + wake_up_process(kthread); + } + + dev_dbg(dev, "Enabling interrupts...\n"); + ccp5_enable_queue_interrupts(ccp); + + dev_dbg(dev, "Registering device...\n"); + /* Put this on the unit list to make it available */ + ccp_add_device(ccp); + + ret = ccp_register_rng(ccp); + if (ret) + goto e_kthread; + + /* Register the DMA engine support */ + ret = ccp_dmaengine_register(ccp); + if (ret) + goto e_hwrng; + +#ifdef CONFIG_CRYPTO_DEV_CCP_DEBUGFS + /* Set up debugfs entries */ + ccp5_debugfs_setup(ccp); +#endif + + return 0; + +e_hwrng: + ccp_unregister_rng(ccp); + +e_kthread: + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + +e_irq: + sp_free_ccp_irq(ccp->sp, ccp); + +e_pool: + for (i = 0; i < ccp->cmd_q_count; i++) + dma_pool_destroy(ccp->cmd_q[i].dma_pool); + + return ret; +} + +static void ccp5_destroy(struct ccp_device *ccp) +{ + struct ccp_cmd_queue *cmd_q; + struct ccp_cmd *cmd; + unsigned int i; + + /* Unregister the DMA engine */ + ccp_dmaengine_unregister(ccp); + + /* Unregister the RNG */ + ccp_unregister_rng(ccp); + + /* Remove this device from the list of available units first */ + ccp_del_device(ccp); + +#ifdef CONFIG_CRYPTO_DEV_CCP_DEBUGFS + /* We're in the process of tearing down the entire driver; + * when all the devices are gone clean up debugfs + */ + if (ccp_present()) + ccp5_debugfs_destroy(); +#endif + + /* Disable and clear interrupts */ + ccp5_disable_queue_interrupts(ccp); + for (i = 0; i < ccp->cmd_q_count; i++) { + cmd_q = &ccp->cmd_q[i]; + + /* Turn off the run bit */ + iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control); + + /* Clear the interrupt status */ + iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status); + ioread32(cmd_q->reg_int_status); + ioread32(cmd_q->reg_status); + } + + /* Stop the queue kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].kthread) + kthread_stop(ccp->cmd_q[i].kthread); + + sp_free_ccp_irq(ccp->sp, ccp); + + /* Flush the cmd and backlog queue */ + while (!list_empty(&ccp->cmd)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } + while (!list_empty(&ccp->backlog)) { + /* Invoke the callback directly with an error code */ + cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry); + list_del(&cmd->entry); + cmd->callback(cmd->data, -ENODEV); + } +} + +static void ccp5_config(struct ccp_device *ccp) +{ + /* Public side */ + iowrite32(0x0, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET); +} + +static void ccp5other_config(struct ccp_device *ccp) +{ + int i; + u32 rnd; + + /* We own all of the queues on the NTB CCP */ + + iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET); + iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET); + for (i = 0; i < 12; i++) { + rnd = ioread32(ccp->io_regs + TRNG_OUT_REG); + iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET); + } + + iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET); + iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET); + iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET); + + iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET); + iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET); + + iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET); + + ccp5_config(ccp); +} + +/* Version 5 adds some function, but is essentially the same as v5 */ +static const struct ccp_actions ccp5_actions = { + .aes = ccp5_perform_aes, + .xts_aes = ccp5_perform_xts_aes, + .sha = ccp5_perform_sha, + .des3 = ccp5_perform_des3, + .rsa = ccp5_perform_rsa, + .passthru = ccp5_perform_passthru, + .ecc = ccp5_perform_ecc, + .sballoc = ccp_lsb_alloc, + .sbfree = ccp_lsb_free, + .init = ccp5_init, + .destroy = ccp5_destroy, + .get_free_slots = ccp5_get_free_slots, +}; + +const struct ccp_vdata ccpv5a = { + .version = CCP_VERSION(5, 0), + .setup = ccp5_config, + .perform = &ccp5_actions, + .offset = 0x0, + .rsamax = CCP5_RSA_MAX_WIDTH, +}; + +const struct ccp_vdata ccpv5b = { + .version = CCP_VERSION(5, 0), + .dma_chan_attr = DMA_PRIVATE, + .setup = ccp5other_config, + .perform = &ccp5_actions, + .offset = 0x0, + .rsamax = CCP5_RSA_MAX_WIDTH, +}; diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c new file mode 100644 index 000000000..0971ee60f --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.c @@ -0,0 +1,674 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013,2019 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/hw_random.h> +#include <linux/cpu.h> +#include <linux/atomic.h> +#ifdef CONFIG_X86 +#include <asm/cpu_device_id.h> +#endif +#include <linux/ccp.h> + +#include "ccp-dev.h" + +#define MAX_CCPS 32 + +/* Limit CCP use to a specifed number of queues per device */ +static unsigned int nqueues = 0; +module_param(nqueues, uint, 0444); +MODULE_PARM_DESC(nqueues, "Number of queues per CCP (minimum 1; default: all available)"); + +/* Limit the maximum number of configured CCPs */ +static atomic_t dev_count = ATOMIC_INIT(0); +static unsigned int max_devs = MAX_CCPS; +module_param(max_devs, uint, 0444); +MODULE_PARM_DESC(max_devs, "Maximum number of CCPs to enable (default: all; 0 disables all CCPs)"); + +struct ccp_tasklet_data { + struct completion completion; + struct ccp_cmd *cmd; +}; + +/* Human-readable error strings */ +#define CCP_MAX_ERROR_CODE 64 +static char *ccp_error_codes[] = { + "", + "ILLEGAL_ENGINE", + "ILLEGAL_KEY_ID", + "ILLEGAL_FUNCTION_TYPE", + "ILLEGAL_FUNCTION_MODE", + "ILLEGAL_FUNCTION_ENCRYPT", + "ILLEGAL_FUNCTION_SIZE", + "Zlib_MISSING_INIT_EOM", + "ILLEGAL_FUNCTION_RSVD", + "ILLEGAL_BUFFER_LENGTH", + "VLSB_FAULT", + "ILLEGAL_MEM_ADDR", + "ILLEGAL_MEM_SEL", + "ILLEGAL_CONTEXT_ID", + "ILLEGAL_KEY_ADDR", + "0xF Reserved", + "Zlib_ILLEGAL_MULTI_QUEUE", + "Zlib_ILLEGAL_JOBID_CHANGE", + "CMD_TIMEOUT", + "IDMA0_AXI_SLVERR", + "IDMA0_AXI_DECERR", + "0x15 Reserved", + "IDMA1_AXI_SLAVE_FAULT", + "IDMA1_AIXI_DECERR", + "0x18 Reserved", + "ZLIBVHB_AXI_SLVERR", + "ZLIBVHB_AXI_DECERR", + "0x1B Reserved", + "ZLIB_UNEXPECTED_EOM", + "ZLIB_EXTRA_DATA", + "ZLIB_BTYPE", + "ZLIB_UNDEFINED_SYMBOL", + "ZLIB_UNDEFINED_DISTANCE_S", + "ZLIB_CODE_LENGTH_SYMBOL", + "ZLIB _VHB_ILLEGAL_FETCH", + "ZLIB_UNCOMPRESSED_LEN", + "ZLIB_LIMIT_REACHED", + "ZLIB_CHECKSUM_MISMATCH0", + "ODMA0_AXI_SLVERR", + "ODMA0_AXI_DECERR", + "0x28 Reserved", + "ODMA1_AXI_SLVERR", + "ODMA1_AXI_DECERR", +}; + +void ccp_log_error(struct ccp_device *d, unsigned int e) +{ + if (WARN_ON(e >= CCP_MAX_ERROR_CODE)) + return; + + if (e < ARRAY_SIZE(ccp_error_codes)) + dev_err(d->dev, "CCP error %d: %s\n", e, ccp_error_codes[e]); + else + dev_err(d->dev, "CCP error %d: Unknown Error\n", e); +} + +/* List of CCPs, CCP count, read-write access lock, and access functions + * + * Lock structure: get ccp_unit_lock for reading whenever we need to + * examine the CCP list. While holding it for reading we can acquire + * the RR lock to update the round-robin next-CCP pointer. The unit lock + * must be acquired before the RR lock. + * + * If the unit-lock is acquired for writing, we have total control over + * the list, so there's no value in getting the RR lock. + */ +static DEFINE_RWLOCK(ccp_unit_lock); +static LIST_HEAD(ccp_units); + +/* Round-robin counter */ +static DEFINE_SPINLOCK(ccp_rr_lock); +static struct ccp_device *ccp_rr; + +/** + * ccp_add_device - add a CCP device to the list + * + * @ccp: ccp_device struct pointer + * + * Put this CCP on the unit list, which makes it available + * for use. + * + * Returns zero if a CCP device is present, -ENODEV otherwise. + */ +void ccp_add_device(struct ccp_device *ccp) +{ + unsigned long flags; + + write_lock_irqsave(&ccp_unit_lock, flags); + list_add_tail(&ccp->entry, &ccp_units); + if (!ccp_rr) + /* We already have the list lock (we're first) so this + * pointer can't change on us. Set its initial value. + */ + ccp_rr = ccp; + write_unlock_irqrestore(&ccp_unit_lock, flags); +} + +/** + * ccp_del_device - remove a CCP device from the list + * + * @ccp: ccp_device struct pointer + * + * Remove this unit from the list of devices. If the next device + * up for use is this one, adjust the pointer. If this is the last + * device, NULL the pointer. + */ +void ccp_del_device(struct ccp_device *ccp) +{ + unsigned long flags; + + write_lock_irqsave(&ccp_unit_lock, flags); + if (ccp_rr == ccp) { + /* ccp_unit_lock is read/write; any read access + * will be suspended while we make changes to the + * list and RR pointer. + */ + if (list_is_last(&ccp_rr->entry, &ccp_units)) + ccp_rr = list_first_entry(&ccp_units, struct ccp_device, + entry); + else + ccp_rr = list_next_entry(ccp_rr, entry); + } + list_del(&ccp->entry); + if (list_empty(&ccp_units)) + ccp_rr = NULL; + write_unlock_irqrestore(&ccp_unit_lock, flags); +} + + + +int ccp_register_rng(struct ccp_device *ccp) +{ + int ret = 0; + + dev_dbg(ccp->dev, "Registering RNG...\n"); + /* Register an RNG */ + ccp->hwrng.name = ccp->rngname; + ccp->hwrng.read = ccp_trng_read; + ret = hwrng_register(&ccp->hwrng); + if (ret) + dev_err(ccp->dev, "error registering hwrng (%d)\n", ret); + + return ret; +} + +void ccp_unregister_rng(struct ccp_device *ccp) +{ + if (ccp->hwrng.name) + hwrng_unregister(&ccp->hwrng); +} + +static struct ccp_device *ccp_get_device(void) +{ + unsigned long flags; + struct ccp_device *dp = NULL; + + /* We round-robin through the unit list. + * The (ccp_rr) pointer refers to the next unit to use. + */ + read_lock_irqsave(&ccp_unit_lock, flags); + if (!list_empty(&ccp_units)) { + spin_lock(&ccp_rr_lock); + dp = ccp_rr; + if (list_is_last(&ccp_rr->entry, &ccp_units)) + ccp_rr = list_first_entry(&ccp_units, struct ccp_device, + entry); + else + ccp_rr = list_next_entry(ccp_rr, entry); + spin_unlock(&ccp_rr_lock); + } + read_unlock_irqrestore(&ccp_unit_lock, flags); + + return dp; +} + +/** + * ccp_present - check if a CCP device is present + * + * Returns zero if a CCP device is present, -ENODEV otherwise. + */ +int ccp_present(void) +{ + unsigned long flags; + int ret; + + read_lock_irqsave(&ccp_unit_lock, flags); + ret = list_empty(&ccp_units); + read_unlock_irqrestore(&ccp_unit_lock, flags); + + return ret ? -ENODEV : 0; +} +EXPORT_SYMBOL_GPL(ccp_present); + +/** + * ccp_version - get the version of the CCP device + * + * Returns the version from the first unit on the list; + * otherwise a zero if no CCP device is present + */ +unsigned int ccp_version(void) +{ + struct ccp_device *dp; + unsigned long flags; + int ret = 0; + + read_lock_irqsave(&ccp_unit_lock, flags); + if (!list_empty(&ccp_units)) { + dp = list_first_entry(&ccp_units, struct ccp_device, entry); + ret = dp->vdata->version; + } + read_unlock_irqrestore(&ccp_unit_lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(ccp_version); + +/** + * ccp_enqueue_cmd - queue an operation for processing by the CCP + * + * @cmd: ccp_cmd struct to be processed + * + * Queue a cmd to be processed by the CCP. If queueing the cmd + * would exceed the defined length of the cmd queue the cmd will + * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will + * result in a return code of -EBUSY. + * + * The callback routine specified in the ccp_cmd struct will be + * called to notify the caller of completion (if the cmd was not + * backlogged) or advancement out of the backlog. If the cmd has + * advanced out of the backlog the "err" value of the callback + * will be -EINPROGRESS. Any other "err" value during callback is + * the result of the operation. + * + * The cmd has been successfully queued if: + * the return code is -EINPROGRESS or + * the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set + */ +int ccp_enqueue_cmd(struct ccp_cmd *cmd) +{ + struct ccp_device *ccp; + unsigned long flags; + unsigned int i; + int ret; + + /* Some commands might need to be sent to a specific device */ + ccp = cmd->ccp ? cmd->ccp : ccp_get_device(); + + if (!ccp) + return -ENODEV; + + /* Caller must supply a callback routine */ + if (!cmd->callback) + return -EINVAL; + + cmd->ccp = ccp; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + i = ccp->cmd_q_count; + + if (ccp->cmd_count >= MAX_CMD_QLEN) { + if (cmd->flags & CCP_CMD_MAY_BACKLOG) { + ret = -EBUSY; + list_add_tail(&cmd->entry, &ccp->backlog); + } else { + ret = -ENOSPC; + } + } else { + ret = -EINPROGRESS; + ccp->cmd_count++; + list_add_tail(&cmd->entry, &ccp->cmd); + + /* Find an idle queue */ + if (!ccp->suspending) { + for (i = 0; i < ccp->cmd_q_count; i++) { + if (ccp->cmd_q[i].active) + continue; + + break; + } + } + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* If we found an idle queue, wake it up */ + if (i < ccp->cmd_q_count) + wake_up_process(ccp->cmd_q[i].kthread); + + return ret; +} +EXPORT_SYMBOL_GPL(ccp_enqueue_cmd); + +static void ccp_do_cmd_backlog(struct work_struct *work) +{ + struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work); + struct ccp_device *ccp = cmd->ccp; + unsigned long flags; + unsigned int i; + + cmd->callback(cmd->data, -EINPROGRESS); + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->cmd_count++; + list_add_tail(&cmd->entry, &ccp->cmd); + + /* Find an idle queue */ + for (i = 0; i < ccp->cmd_q_count; i++) { + if (ccp->cmd_q[i].active) + continue; + + break; + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* If we found an idle queue, wake it up */ + if (i < ccp->cmd_q_count) + wake_up_process(ccp->cmd_q[i].kthread); +} + +static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q) +{ + struct ccp_device *ccp = cmd_q->ccp; + struct ccp_cmd *cmd = NULL; + struct ccp_cmd *backlog = NULL; + unsigned long flags; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + cmd_q->active = 0; + + if (ccp->suspending) { + cmd_q->suspended = 1; + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + wake_up_interruptible(&ccp->suspend_queue); + + return NULL; + } + + if (ccp->cmd_count) { + cmd_q->active = 1; + + cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry); + list_del(&cmd->entry); + + ccp->cmd_count--; + } + + if (!list_empty(&ccp->backlog)) { + backlog = list_first_entry(&ccp->backlog, struct ccp_cmd, + entry); + list_del(&backlog->entry); + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + if (backlog) { + INIT_WORK(&backlog->work, ccp_do_cmd_backlog); + schedule_work(&backlog->work); + } + + return cmd; +} + +static void ccp_do_cmd_complete(unsigned long data) +{ + struct ccp_tasklet_data *tdata = (struct ccp_tasklet_data *)data; + struct ccp_cmd *cmd = tdata->cmd; + + cmd->callback(cmd->data, cmd->ret); + + complete(&tdata->completion); +} + +/** + * ccp_cmd_queue_thread - create a kernel thread to manage a CCP queue + * + * @data: thread-specific data + */ +int ccp_cmd_queue_thread(void *data) +{ + struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data; + struct ccp_cmd *cmd; + struct ccp_tasklet_data tdata; + struct tasklet_struct tasklet; + + tasklet_init(&tasklet, ccp_do_cmd_complete, (unsigned long)&tdata); + + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + schedule(); + + set_current_state(TASK_INTERRUPTIBLE); + + cmd = ccp_dequeue_cmd(cmd_q); + if (!cmd) + continue; + + __set_current_state(TASK_RUNNING); + + /* Execute the command */ + cmd->ret = ccp_run_cmd(cmd_q, cmd); + + /* Schedule the completion callback */ + tdata.cmd = cmd; + init_completion(&tdata.completion); + tasklet_schedule(&tasklet); + wait_for_completion(&tdata.completion); + } + + __set_current_state(TASK_RUNNING); + + return 0; +} + +/** + * ccp_alloc_struct - allocate and initialize the ccp_device struct + * + * @dev: device struct of the CCP + */ +struct ccp_device *ccp_alloc_struct(struct sp_device *sp) +{ + struct device *dev = sp->dev; + struct ccp_device *ccp; + + ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL); + if (!ccp) + return NULL; + ccp->dev = dev; + ccp->sp = sp; + ccp->axcache = sp->axcache; + + INIT_LIST_HEAD(&ccp->cmd); + INIT_LIST_HEAD(&ccp->backlog); + + spin_lock_init(&ccp->cmd_lock); + mutex_init(&ccp->req_mutex); + mutex_init(&ccp->sb_mutex); + ccp->sb_count = KSB_COUNT; + ccp->sb_start = 0; + + /* Initialize the wait queues */ + init_waitqueue_head(&ccp->sb_queue); + init_waitqueue_head(&ccp->suspend_queue); + + snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", sp->ord); + snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", sp->ord); + + return ccp; +} + +int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait) +{ + struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng); + u32 trng_value; + int len = min_t(int, sizeof(trng_value), max); + + /* Locking is provided by the caller so we can update device + * hwrng-related fields safely + */ + trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG); + if (!trng_value) { + /* Zero is returned if not data is available or if a + * bad-entropy error is present. Assume an error if + * we exceed TRNG_RETRIES reads of zero. + */ + if (ccp->hwrng_retries++ > TRNG_RETRIES) + return -EIO; + + return 0; + } + + /* Reset the counter and save the rng value */ + ccp->hwrng_retries = 0; + memcpy(data, &trng_value, len); + + return len; +} + +bool ccp_queues_suspended(struct ccp_device *ccp) +{ + unsigned int suspended = 0; + unsigned long flags; + unsigned int i; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + for (i = 0; i < ccp->cmd_q_count; i++) + if (ccp->cmd_q[i].suspended) + suspended++; + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + return ccp->cmd_q_count == suspended; +} + +int ccp_dev_suspend(struct sp_device *sp) +{ + struct ccp_device *ccp = sp->ccp_data; + unsigned long flags; + unsigned int i; + + /* If there's no device there's nothing to do */ + if (!ccp) + return 0; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->suspending = 1; + + /* Wake all the queue kthreads to prepare for suspend */ + for (i = 0; i < ccp->cmd_q_count; i++) + wake_up_process(ccp->cmd_q[i].kthread); + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + /* Wait for all queue kthreads to say they're done */ + while (!ccp_queues_suspended(ccp)) + wait_event_interruptible(ccp->suspend_queue, + ccp_queues_suspended(ccp)); + + return 0; +} + +int ccp_dev_resume(struct sp_device *sp) +{ + struct ccp_device *ccp = sp->ccp_data; + unsigned long flags; + unsigned int i; + + /* If there's no device there's nothing to do */ + if (!ccp) + return 0; + + spin_lock_irqsave(&ccp->cmd_lock, flags); + + ccp->suspending = 0; + + /* Wake up all the kthreads */ + for (i = 0; i < ccp->cmd_q_count; i++) { + ccp->cmd_q[i].suspended = 0; + wake_up_process(ccp->cmd_q[i].kthread); + } + + spin_unlock_irqrestore(&ccp->cmd_lock, flags); + + return 0; +} + +int ccp_dev_init(struct sp_device *sp) +{ + struct device *dev = sp->dev; + struct ccp_device *ccp; + int ret; + + /* + * Check how many we have so far, and stop after reaching + * that number + */ + if (atomic_inc_return(&dev_count) > max_devs) + return 0; /* don't fail the load */ + + ret = -ENOMEM; + ccp = ccp_alloc_struct(sp); + if (!ccp) + goto e_err; + sp->ccp_data = ccp; + + if (!nqueues || (nqueues > MAX_HW_QUEUES)) + ccp->max_q_count = MAX_HW_QUEUES; + else + ccp->max_q_count = nqueues; + + ccp->vdata = (struct ccp_vdata *)sp->dev_vdata->ccp_vdata; + if (!ccp->vdata || !ccp->vdata->version) { + ret = -ENODEV; + dev_err(dev, "missing driver data\n"); + goto e_err; + } + + ccp->use_tasklet = sp->use_tasklet; + + ccp->io_regs = sp->io_map + ccp->vdata->offset; + if (ccp->vdata->setup) + ccp->vdata->setup(ccp); + + ret = ccp->vdata->perform->init(ccp); + if (ret) { + /* A positive number means that the device cannot be initialized, + * but no additional message is required. + */ + if (ret > 0) + goto e_quiet; + + /* An unexpected problem occurred, and should be reported in the log */ + goto e_err; + } + + dev_notice(dev, "ccp enabled\n"); + + return 0; + +e_err: + dev_notice(dev, "ccp initialization failed\n"); + +e_quiet: + sp->ccp_data = NULL; + + return ret; +} + +void ccp_dev_destroy(struct sp_device *sp) +{ + struct ccp_device *ccp = sp->ccp_data; + + if (!ccp) + return; + + ccp->vdata->perform->destroy(ccp); +} diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h new file mode 100644 index 000000000..a5d9123a2 --- /dev/null +++ b/drivers/crypto/ccp/ccp-dev.h @@ -0,0 +1,673 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013,2017 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#ifndef __CCP_DEV_H__ +#define __CCP_DEV_H__ + +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/dma-direction.h> +#include <linux/dmapool.h> +#include <linux/hw_random.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/irqreturn.h> +#include <linux/dmaengine.h> + +#include "sp-dev.h" + +#define MAX_CCP_NAME_LEN 16 +#define MAX_DMAPOOL_NAME_LEN 32 + +#define MAX_HW_QUEUES 5 +#define MAX_CMD_QLEN 100 + +#define TRNG_RETRIES 10 + +#define CACHE_NONE 0x00 +#define CACHE_WB_NO_ALLOC 0xb7 + +/****** Register Mappings ******/ +#define Q_MASK_REG 0x000 +#define TRNG_OUT_REG 0x00c +#define IRQ_MASK_REG 0x040 +#define IRQ_STATUS_REG 0x200 + +#define DEL_CMD_Q_JOB 0x124 +#define DEL_Q_ACTIVE 0x00000200 +#define DEL_Q_ID_SHIFT 6 + +#define CMD_REQ0 0x180 +#define CMD_REQ_INCR 0x04 + +#define CMD_Q_STATUS_BASE 0x210 +#define CMD_Q_INT_STATUS_BASE 0x214 +#define CMD_Q_STATUS_INCR 0x20 + +#define CMD_Q_CACHE_BASE 0x228 +#define CMD_Q_CACHE_INC 0x20 + +#define CMD_Q_ERROR(__qs) ((__qs) & 0x0000003f) +#define CMD_Q_DEPTH(__qs) (((__qs) >> 12) & 0x0000000f) + +/* ------------------------ CCP Version 5 Specifics ------------------------ */ +#define CMD5_QUEUE_MASK_OFFSET 0x00 +#define CMD5_QUEUE_PRIO_OFFSET 0x04 +#define CMD5_REQID_CONFIG_OFFSET 0x08 +#define CMD5_CMD_TIMEOUT_OFFSET 0x10 +#define LSB_PUBLIC_MASK_LO_OFFSET 0x18 +#define LSB_PUBLIC_MASK_HI_OFFSET 0x1C +#define LSB_PRIVATE_MASK_LO_OFFSET 0x20 +#define LSB_PRIVATE_MASK_HI_OFFSET 0x24 +#define CMD5_PSP_CCP_VERSION 0x100 + +#define CMD5_Q_CONTROL_BASE 0x0000 +#define CMD5_Q_TAIL_LO_BASE 0x0004 +#define CMD5_Q_HEAD_LO_BASE 0x0008 +#define CMD5_Q_INT_ENABLE_BASE 0x000C +#define CMD5_Q_INTERRUPT_STATUS_BASE 0x0010 + +#define CMD5_Q_STATUS_BASE 0x0100 +#define CMD5_Q_INT_STATUS_BASE 0x0104 +#define CMD5_Q_DMA_STATUS_BASE 0x0108 +#define CMD5_Q_DMA_READ_STATUS_BASE 0x010C +#define CMD5_Q_DMA_WRITE_STATUS_BASE 0x0110 +#define CMD5_Q_ABORT_BASE 0x0114 +#define CMD5_Q_AX_CACHE_BASE 0x0118 + +#define CMD5_CONFIG_0_OFFSET 0x6000 +#define CMD5_TRNG_CTL_OFFSET 0x6008 +#define CMD5_AES_MASK_OFFSET 0x6010 +#define CMD5_CLK_GATE_CTL_OFFSET 0x603C + +/* Address offset between two virtual queue registers */ +#define CMD5_Q_STATUS_INCR 0x1000 + +/* Bit masks */ +#define CMD5_Q_RUN 0x1 +#define CMD5_Q_HALT 0x2 +#define CMD5_Q_MEM_LOCATION 0x4 +#define CMD5_Q_SIZE 0x1F +#define CMD5_Q_SHIFT 3 +#define COMMANDS_PER_QUEUE 16 +#define QUEUE_SIZE_VAL ((ffs(COMMANDS_PER_QUEUE) - 2) & \ + CMD5_Q_SIZE) +#define Q_PTR_MASK (2 << (QUEUE_SIZE_VAL + 5) - 1) +#define Q_DESC_SIZE sizeof(struct ccp5_desc) +#define Q_SIZE(n) (COMMANDS_PER_QUEUE*(n)) + +#define INT_COMPLETION 0x1 +#define INT_ERROR 0x2 +#define INT_QUEUE_STOPPED 0x4 +#define INT_EMPTY_QUEUE 0x8 +#define SUPPORTED_INTERRUPTS (INT_COMPLETION | INT_ERROR) + +#define LSB_REGION_WIDTH 5 +#define MAX_LSB_CNT 8 + +#define LSB_SIZE 16 +#define LSB_ITEM_SIZE 32 +#define PLSB_MAP_SIZE (LSB_SIZE) +#define SLSB_MAP_SIZE (MAX_LSB_CNT * LSB_SIZE) + +#define LSB_ENTRY_NUMBER(LSB_ADDR) (LSB_ADDR / LSB_ITEM_SIZE) + +/* ------------------------ CCP Version 3 Specifics ------------------------ */ +#define REQ0_WAIT_FOR_WRITE 0x00000004 +#define REQ0_INT_ON_COMPLETE 0x00000002 +#define REQ0_STOP_ON_COMPLETE 0x00000001 + +#define REQ0_CMD_Q_SHIFT 9 +#define REQ0_JOBID_SHIFT 3 + +/****** REQ1 Related Values ******/ +#define REQ1_PROTECT_SHIFT 27 +#define REQ1_ENGINE_SHIFT 23 +#define REQ1_KEY_KSB_SHIFT 2 + +#define REQ1_EOM 0x00000002 +#define REQ1_INIT 0x00000001 + +/* AES Related Values */ +#define REQ1_AES_TYPE_SHIFT 21 +#define REQ1_AES_MODE_SHIFT 18 +#define REQ1_AES_ACTION_SHIFT 17 +#define REQ1_AES_CFB_SIZE_SHIFT 10 + +/* XTS-AES Related Values */ +#define REQ1_XTS_AES_SIZE_SHIFT 10 + +/* SHA Related Values */ +#define REQ1_SHA_TYPE_SHIFT 21 + +/* RSA Related Values */ +#define REQ1_RSA_MOD_SIZE_SHIFT 10 + +/* Pass-Through Related Values */ +#define REQ1_PT_BW_SHIFT 12 +#define REQ1_PT_BS_SHIFT 10 + +/* ECC Related Values */ +#define REQ1_ECC_AFFINE_CONVERT 0x00200000 +#define REQ1_ECC_FUNCTION_SHIFT 18 + +/****** REQ4 Related Values ******/ +#define REQ4_KSB_SHIFT 18 +#define REQ4_MEMTYPE_SHIFT 16 + +/****** REQ6 Related Values ******/ +#define REQ6_MEMTYPE_SHIFT 16 + +/****** Key Storage Block ******/ +#define KSB_START 77 +#define KSB_END 127 +#define KSB_COUNT (KSB_END - KSB_START + 1) +#define CCP_SB_BITS 256 + +#define CCP_JOBID_MASK 0x0000003f + +/* ------------------------ General CCP Defines ------------------------ */ + +#define CCP_DMA_DFLT 0x0 +#define CCP_DMA_PRIV 0x1 +#define CCP_DMA_PUB 0x2 + +#define CCP_DMAPOOL_MAX_SIZE 64 +#define CCP_DMAPOOL_ALIGN BIT(5) + +#define CCP_REVERSE_BUF_SIZE 64 + +#define CCP_AES_KEY_SB_COUNT 1 +#define CCP_AES_CTX_SB_COUNT 1 + +#define CCP_XTS_AES_KEY_SB_COUNT 1 +#define CCP5_XTS_AES_KEY_SB_COUNT 2 +#define CCP_XTS_AES_CTX_SB_COUNT 1 + +#define CCP_DES3_KEY_SB_COUNT 1 +#define CCP_DES3_CTX_SB_COUNT 1 + +#define CCP_SHA_SB_COUNT 1 + +#define CCP_RSA_MAX_WIDTH 4096 +#define CCP5_RSA_MAX_WIDTH 16384 + +#define CCP_PASSTHRU_BLOCKSIZE 256 +#define CCP_PASSTHRU_MASKSIZE 32 +#define CCP_PASSTHRU_SB_COUNT 1 + +#define CCP_ECC_MODULUS_BYTES 48 /* 384-bits */ +#define CCP_ECC_MAX_OPERANDS 6 +#define CCP_ECC_MAX_OUTPUTS 3 +#define CCP_ECC_SRC_BUF_SIZE 448 +#define CCP_ECC_DST_BUF_SIZE 192 +#define CCP_ECC_OPERAND_SIZE 64 +#define CCP_ECC_OUTPUT_SIZE 64 +#define CCP_ECC_RESULT_OFFSET 60 +#define CCP_ECC_RESULT_SUCCESS 0x0001 + +#define CCP_SB_BYTES 32 + +struct ccp_op; +struct ccp_device; +struct ccp_cmd; +struct ccp_fns; + +struct ccp_dma_cmd { + struct list_head entry; + + struct ccp_cmd ccp_cmd; +}; + +struct ccp_dma_desc { + struct list_head entry; + + struct ccp_device *ccp; + + struct list_head pending; + struct list_head active; + + enum dma_status status; + struct dma_async_tx_descriptor tx_desc; + size_t len; +}; + +struct ccp_dma_chan { + struct ccp_device *ccp; + + spinlock_t lock; + struct list_head created; + struct list_head pending; + struct list_head active; + struct list_head complete; + + struct tasklet_struct cleanup_tasklet; + + enum dma_status status; + struct dma_chan dma_chan; +}; + +struct ccp_cmd_queue { + struct ccp_device *ccp; + + /* Queue identifier */ + u32 id; + + /* Queue dma pool */ + struct dma_pool *dma_pool; + + /* Queue base address (not neccessarily aligned)*/ + struct ccp5_desc *qbase; + + /* Aligned queue start address (per requirement) */ + struct mutex q_mutex ____cacheline_aligned; + unsigned int qidx; + + /* Version 5 has different requirements for queue memory */ + unsigned int qsize; + dma_addr_t qbase_dma; + dma_addr_t qdma_tail; + + /* Per-queue reserved storage block(s) */ + u32 sb_key; + u32 sb_ctx; + + /* Bitmap of LSBs that can be accessed by this queue */ + DECLARE_BITMAP(lsbmask, MAX_LSB_CNT); + /* Private LSB that is assigned to this queue, or -1 if none. + * Bitmap for my private LSB, unused otherwise + */ + int lsb; + DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE); + + /* Queue processing thread */ + struct task_struct *kthread; + unsigned int active; + unsigned int suspended; + + /* Number of free command slots available */ + unsigned int free_slots; + + /* Interrupt masks */ + u32 int_ok; + u32 int_err; + + /* Register addresses for queue */ + void __iomem *reg_control; + void __iomem *reg_tail_lo; + void __iomem *reg_head_lo; + void __iomem *reg_int_enable; + void __iomem *reg_interrupt_status; + void __iomem *reg_status; + void __iomem *reg_int_status; + void __iomem *reg_dma_status; + void __iomem *reg_dma_read_status; + void __iomem *reg_dma_write_status; + u32 qcontrol; /* Cached control register */ + + /* Status values from job */ + u32 int_status; + u32 q_status; + u32 q_int_status; + u32 cmd_error; + + /* Interrupt wait queue */ + wait_queue_head_t int_queue; + unsigned int int_rcvd; + + /* Per-queue Statistics */ + unsigned long total_ops; + unsigned long total_aes_ops; + unsigned long total_xts_aes_ops; + unsigned long total_3des_ops; + unsigned long total_sha_ops; + unsigned long total_rsa_ops; + unsigned long total_pt_ops; + unsigned long total_ecc_ops; +} ____cacheline_aligned; + +struct ccp_device { + struct list_head entry; + + struct ccp_vdata *vdata; + unsigned int ord; + char name[MAX_CCP_NAME_LEN]; + char rngname[MAX_CCP_NAME_LEN]; + + struct device *dev; + struct sp_device *sp; + + /* Bus specific device information + */ + void *dev_specific; + unsigned int qim; + unsigned int irq; + bool use_tasklet; + struct tasklet_struct irq_tasklet; + + /* I/O area used for device communication. The register mapping + * starts at an offset into the mapped bar. + * The CMD_REQx registers and the Delete_Cmd_Queue_Job register + * need to be protected while a command queue thread is accessing + * them. + */ + struct mutex req_mutex ____cacheline_aligned; + void __iomem *io_regs; + + /* Master lists that all cmds are queued on. Because there can be + * more than one CCP command queue that can process a cmd a separate + * backlog list is neeeded so that the backlog completion call + * completes before the cmd is available for execution. + */ + spinlock_t cmd_lock ____cacheline_aligned; + unsigned int cmd_count; + struct list_head cmd; + struct list_head backlog; + + /* The command queues. These represent the queues available on the + * CCP that are available for processing cmds + */ + struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES]; + unsigned int cmd_q_count; + unsigned int max_q_count; + + /* Support for the CCP True RNG + */ + struct hwrng hwrng; + unsigned int hwrng_retries; + + /* Support for the CCP DMA capabilities + */ + struct dma_device dma_dev; + struct ccp_dma_chan *ccp_dma_chan; + struct kmem_cache *dma_cmd_cache; + struct kmem_cache *dma_desc_cache; + + /* A counter used to generate job-ids for cmds submitted to the CCP + */ + atomic_t current_id ____cacheline_aligned; + + /* The v3 CCP uses key storage blocks (SB) to maintain context for + * certain operations. To prevent multiple cmds from using the same + * SB range a command queue reserves an SB range for the duration of + * the cmd. Each queue, will however, reserve 2 SB blocks for + * operations that only require single SB entries (eg. AES context/iv + * and key) in order to avoid allocation contention. This will reserve + * at most 10 SB entries, leaving 40 SB entries available for dynamic + * allocation. + * + * The v5 CCP Local Storage Block (LSB) is broken up into 8 + * memrory ranges, each of which can be enabled for access by one + * or more queues. Device initialization takes this into account, + * and attempts to assign one region for exclusive use by each + * available queue; the rest are then aggregated as "public" use. + * If there are fewer regions than queues, all regions are shared + * amongst all queues. + */ + struct mutex sb_mutex ____cacheline_aligned; + DECLARE_BITMAP(sb, KSB_COUNT); + wait_queue_head_t sb_queue; + unsigned int sb_avail; + unsigned int sb_count; + u32 sb_start; + + /* Bitmap of shared LSBs, if any */ + DECLARE_BITMAP(lsbmap, SLSB_MAP_SIZE); + + /* Suspend support */ + unsigned int suspending; + wait_queue_head_t suspend_queue; + + /* DMA caching attribute support */ + unsigned int axcache; + + /* Device Statistics */ + unsigned long total_interrupts; + + /* DebugFS info */ + struct dentry *debugfs_instance; +}; + +enum ccp_memtype { + CCP_MEMTYPE_SYSTEM = 0, + CCP_MEMTYPE_SB, + CCP_MEMTYPE_LOCAL, + CCP_MEMTYPE__LAST, +}; +#define CCP_MEMTYPE_LSB CCP_MEMTYPE_KSB + + +struct ccp_dma_info { + dma_addr_t address; + unsigned int offset; + unsigned int length; + enum dma_data_direction dir; +} __packed __aligned(4); + +struct ccp_dm_workarea { + struct device *dev; + struct dma_pool *dma_pool; + + u8 *address; + struct ccp_dma_info dma; + unsigned int length; +}; + +struct ccp_sg_workarea { + struct scatterlist *sg; + int nents; + unsigned int sg_used; + + struct scatterlist *dma_sg; + struct scatterlist *dma_sg_head; + struct device *dma_dev; + unsigned int dma_count; + enum dma_data_direction dma_dir; + + u64 bytes_left; +}; + +struct ccp_data { + struct ccp_sg_workarea sg_wa; + struct ccp_dm_workarea dm_wa; +}; + +struct ccp_mem { + enum ccp_memtype type; + union { + struct ccp_dma_info dma; + u32 sb; + } u; +}; + +struct ccp_aes_op { + enum ccp_aes_type type; + enum ccp_aes_mode mode; + enum ccp_aes_action action; + unsigned int size; +}; + +struct ccp_xts_aes_op { + enum ccp_aes_type type; + enum ccp_aes_action action; + enum ccp_xts_aes_unit_size unit_size; +}; + +struct ccp_des3_op { + enum ccp_des3_type type; + enum ccp_des3_mode mode; + enum ccp_des3_action action; +}; + +struct ccp_sha_op { + enum ccp_sha_type type; + u64 msg_bits; +}; + +struct ccp_rsa_op { + u32 mod_size; + u32 input_len; +}; + +struct ccp_passthru_op { + enum ccp_passthru_bitwise bit_mod; + enum ccp_passthru_byteswap byte_swap; +}; + +struct ccp_ecc_op { + enum ccp_ecc_function function; +}; + +struct ccp_op { + struct ccp_cmd_queue *cmd_q; + + u32 jobid; + u32 ioc; + u32 soc; + u32 sb_key; + u32 sb_ctx; + u32 init; + u32 eom; + + struct ccp_mem src; + struct ccp_mem dst; + struct ccp_mem exp; + + union { + struct ccp_aes_op aes; + struct ccp_xts_aes_op xts; + struct ccp_des3_op des3; + struct ccp_sha_op sha; + struct ccp_rsa_op rsa; + struct ccp_passthru_op passthru; + struct ccp_ecc_op ecc; + } u; +}; + +static inline u32 ccp_addr_lo(struct ccp_dma_info *info) +{ + return lower_32_bits(info->address + info->offset); +} + +static inline u32 ccp_addr_hi(struct ccp_dma_info *info) +{ + return upper_32_bits(info->address + info->offset) & 0x0000ffff; +} + +/** + * descriptor for version 5 CPP commands + * 8 32-bit words: + * word 0: function; engine; control bits + * word 1: length of source data + * word 2: low 32 bits of source pointer + * word 3: upper 16 bits of source pointer; source memory type + * word 4: low 32 bits of destination pointer + * word 5: upper 16 bits of destination pointer; destination memory type + * word 6: low 32 bits of key pointer + * word 7: upper 16 bits of key pointer; key memory type + */ +struct dword0 { + unsigned int soc:1; + unsigned int ioc:1; + unsigned int rsvd1:1; + unsigned int init:1; + unsigned int eom:1; /* AES/SHA only */ + unsigned int function:15; + unsigned int engine:4; + unsigned int prot:1; + unsigned int rsvd2:7; +}; + +struct dword3 { + unsigned int src_hi:16; + unsigned int src_mem:2; + unsigned int lsb_cxt_id:8; + unsigned int rsvd1:5; + unsigned int fixed:1; +}; + +union dword4 { + u32 dst_lo; /* NON-SHA */ + u32 sha_len_lo; /* SHA */ +}; + +union dword5 { + struct { + unsigned int dst_hi:16; + unsigned int dst_mem:2; + unsigned int rsvd1:13; + unsigned int fixed:1; + } fields; + u32 sha_len_hi; +}; + +struct dword7 { + unsigned int key_hi:16; + unsigned int key_mem:2; + unsigned int rsvd1:14; +}; + +struct ccp5_desc { + struct dword0 dw0; + u32 length; + u32 src_lo; + struct dword3 dw3; + union dword4 dw4; + union dword5 dw5; + u32 key_lo; + struct dword7 dw7; +}; + +void ccp_add_device(struct ccp_device *ccp); +void ccp_del_device(struct ccp_device *ccp); + +extern void ccp_log_error(struct ccp_device *, unsigned int); + +struct ccp_device *ccp_alloc_struct(struct sp_device *sp); +bool ccp_queues_suspended(struct ccp_device *ccp); +int ccp_cmd_queue_thread(void *data); +int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait); + +int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd); + +int ccp_register_rng(struct ccp_device *ccp); +void ccp_unregister_rng(struct ccp_device *ccp); +int ccp_dmaengine_register(struct ccp_device *ccp); +void ccp_dmaengine_unregister(struct ccp_device *ccp); + +void ccp5_debugfs_setup(struct ccp_device *ccp); +void ccp5_debugfs_destroy(void); + +/* Structure for computation functions that are device-specific */ +struct ccp_actions { + int (*aes)(struct ccp_op *); + int (*xts_aes)(struct ccp_op *); + int (*des3)(struct ccp_op *); + int (*sha)(struct ccp_op *); + int (*rsa)(struct ccp_op *); + int (*passthru)(struct ccp_op *); + int (*ecc)(struct ccp_op *); + u32 (*sballoc)(struct ccp_cmd_queue *, unsigned int); + void (*sbfree)(struct ccp_cmd_queue *, unsigned int, unsigned int); + unsigned int (*get_free_slots)(struct ccp_cmd_queue *); + int (*init)(struct ccp_device *); + void (*destroy)(struct ccp_device *); + irqreturn_t (*irqhandler)(int, void *); +}; + +extern const struct ccp_vdata ccpv3_platform; +extern const struct ccp_vdata ccpv3; +extern const struct ccp_vdata ccpv5a; +extern const struct ccp_vdata ccpv5b; + +#endif diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c new file mode 100644 index 000000000..e416456b2 --- /dev/null +++ b/drivers/crypto/ccp/ccp-dmaengine.c @@ -0,0 +1,792 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2016,2019 Advanced Micro Devices, Inc. + * + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/dma-mapping.h> +#include <linux/dmaengine.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" +#include "../../dma/dmaengine.h" + +#define CCP_DMA_WIDTH(_mask) \ +({ \ + u64 mask = _mask + 1; \ + (mask == 0) ? 64 : fls64(mask); \ +}) + +/* The CCP as a DMA provider can be configured for public or private + * channels. Default is specified in the vdata for the device (PCI ID). + * This module parameter will override for all channels on all devices: + * dma_chan_attr = 0x2 to force all channels public + * = 0x1 to force all channels private + * = 0x0 to defer to the vdata setting + * = any other value: warning, revert to 0x0 + */ +static unsigned int dma_chan_attr = CCP_DMA_DFLT; +module_param(dma_chan_attr, uint, 0444); +MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public"); + +static unsigned int dmaengine = 1; +module_param(dmaengine, uint, 0444); +MODULE_PARM_DESC(dmaengine, "Register services with the DMA subsystem (any non-zero value, default: 1)"); + +static unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp) +{ + switch (dma_chan_attr) { + case CCP_DMA_DFLT: + return ccp->vdata->dma_chan_attr; + + case CCP_DMA_PRIV: + return DMA_PRIVATE; + + case CCP_DMA_PUB: + return 0; + + default: + dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n", + dma_chan_attr); + return ccp->vdata->dma_chan_attr; + } +} + +static void ccp_free_cmd_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_cmd *cmd, *ctmp; + + list_for_each_entry_safe(cmd, ctmp, list, entry) { + list_del(&cmd->entry); + kmem_cache_free(ccp->dma_cmd_cache, cmd); + } +} + +static void ccp_free_desc_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_desc *desc, *dtmp; + + list_for_each_entry_safe(desc, dtmp, list, entry) { + ccp_free_cmd_resources(ccp, &desc->active); + ccp_free_cmd_resources(ccp, &desc->pending); + + list_del(&desc->entry); + kmem_cache_free(ccp->dma_desc_cache, desc); + } +} + +static void ccp_free_chan_resources(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s - chan=%p\n", __func__, chan); + + spin_lock_irqsave(&chan->lock, flags); + + ccp_free_desc_resources(chan->ccp, &chan->complete); + ccp_free_desc_resources(chan->ccp, &chan->active); + ccp_free_desc_resources(chan->ccp, &chan->pending); + ccp_free_desc_resources(chan->ccp, &chan->created); + + spin_unlock_irqrestore(&chan->lock, flags); +} + +static void ccp_cleanup_desc_resources(struct ccp_device *ccp, + struct list_head *list) +{ + struct ccp_dma_desc *desc, *dtmp; + + list_for_each_entry_safe_reverse(desc, dtmp, list, entry) { + if (!async_tx_test_ack(&desc->tx_desc)) + continue; + + dev_dbg(ccp->dev, "%s - desc=%p\n", __func__, desc); + + ccp_free_cmd_resources(ccp, &desc->active); + ccp_free_cmd_resources(ccp, &desc->pending); + + list_del(&desc->entry); + kmem_cache_free(ccp->dma_desc_cache, desc); + } +} + +static void ccp_do_cleanup(unsigned long data) +{ + struct ccp_dma_chan *chan = (struct ccp_dma_chan *)data; + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s - chan=%s\n", __func__, + dma_chan_name(&chan->dma_chan)); + + spin_lock_irqsave(&chan->lock, flags); + + ccp_cleanup_desc_resources(chan->ccp, &chan->complete); + + spin_unlock_irqrestore(&chan->lock, flags); +} + +static int ccp_issue_next_cmd(struct ccp_dma_desc *desc) +{ + struct ccp_dma_cmd *cmd; + int ret; + + cmd = list_first_entry(&desc->pending, struct ccp_dma_cmd, entry); + list_move(&cmd->entry, &desc->active); + + dev_dbg(desc->ccp->dev, "%s - tx %d, cmd=%p\n", __func__, + desc->tx_desc.cookie, cmd); + + ret = ccp_enqueue_cmd(&cmd->ccp_cmd); + if (!ret || (ret == -EINPROGRESS) || (ret == -EBUSY)) + return 0; + + dev_dbg(desc->ccp->dev, "%s - error: ret=%d, tx %d, cmd=%p\n", __func__, + ret, desc->tx_desc.cookie, cmd); + + return ret; +} + +static void ccp_free_active_cmd(struct ccp_dma_desc *desc) +{ + struct ccp_dma_cmd *cmd; + + cmd = list_first_entry_or_null(&desc->active, struct ccp_dma_cmd, + entry); + if (!cmd) + return; + + dev_dbg(desc->ccp->dev, "%s - freeing tx %d cmd=%p\n", + __func__, desc->tx_desc.cookie, cmd); + + list_del(&cmd->entry); + kmem_cache_free(desc->ccp->dma_cmd_cache, cmd); +} + +static struct ccp_dma_desc *__ccp_next_dma_desc(struct ccp_dma_chan *chan, + struct ccp_dma_desc *desc) +{ + /* Move current DMA descriptor to the complete list */ + if (desc) + list_move(&desc->entry, &chan->complete); + + /* Get the next DMA descriptor on the active list */ + desc = list_first_entry_or_null(&chan->active, struct ccp_dma_desc, + entry); + + return desc; +} + +static struct ccp_dma_desc *ccp_handle_active_desc(struct ccp_dma_chan *chan, + struct ccp_dma_desc *desc) +{ + struct dma_async_tx_descriptor *tx_desc; + unsigned long flags; + + /* Loop over descriptors until one is found with commands */ + do { + if (desc) { + /* Remove the DMA command from the list and free it */ + ccp_free_active_cmd(desc); + + if (!list_empty(&desc->pending)) { + /* No errors, keep going */ + if (desc->status != DMA_ERROR) + return desc; + + /* Error, free remaining commands and move on */ + ccp_free_cmd_resources(desc->ccp, + &desc->pending); + } + + tx_desc = &desc->tx_desc; + } else { + tx_desc = NULL; + } + + spin_lock_irqsave(&chan->lock, flags); + + if (desc) { + if (desc->status != DMA_ERROR) + desc->status = DMA_COMPLETE; + + dev_dbg(desc->ccp->dev, + "%s - tx %d complete, status=%u\n", __func__, + desc->tx_desc.cookie, desc->status); + + dma_cookie_complete(tx_desc); + dma_descriptor_unmap(tx_desc); + } + + desc = __ccp_next_dma_desc(chan, desc); + + spin_unlock_irqrestore(&chan->lock, flags); + + if (tx_desc) { + dmaengine_desc_get_callback_invoke(tx_desc, NULL); + + dma_run_dependencies(tx_desc); + } + } while (desc); + + return NULL; +} + +static struct ccp_dma_desc *__ccp_pending_to_active(struct ccp_dma_chan *chan) +{ + struct ccp_dma_desc *desc; + + if (list_empty(&chan->pending)) + return NULL; + + desc = list_empty(&chan->active) + ? list_first_entry(&chan->pending, struct ccp_dma_desc, entry) + : NULL; + + list_splice_tail_init(&chan->pending, &chan->active); + + return desc; +} + +static void ccp_cmd_callback(void *data, int err) +{ + struct ccp_dma_desc *desc = data; + struct ccp_dma_chan *chan; + int ret; + + if (err == -EINPROGRESS) + return; + + chan = container_of(desc->tx_desc.chan, struct ccp_dma_chan, + dma_chan); + + dev_dbg(chan->ccp->dev, "%s - tx %d callback, err=%d\n", + __func__, desc->tx_desc.cookie, err); + + if (err) + desc->status = DMA_ERROR; + + while (true) { + /* Check for DMA descriptor completion */ + desc = ccp_handle_active_desc(chan, desc); + + /* Don't submit cmd if no descriptor or DMA is paused */ + if (!desc || (chan->status == DMA_PAUSED)) + break; + + ret = ccp_issue_next_cmd(desc); + if (!ret) + break; + + desc->status = DMA_ERROR; + } + + tasklet_schedule(&chan->cleanup_tasklet); +} + +static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc) +{ + struct ccp_dma_desc *desc = container_of(tx_desc, struct ccp_dma_desc, + tx_desc); + struct ccp_dma_chan *chan; + dma_cookie_t cookie; + unsigned long flags; + + chan = container_of(tx_desc->chan, struct ccp_dma_chan, dma_chan); + + spin_lock_irqsave(&chan->lock, flags); + + cookie = dma_cookie_assign(tx_desc); + list_del(&desc->entry); + list_add_tail(&desc->entry, &chan->pending); + + spin_unlock_irqrestore(&chan->lock, flags); + + dev_dbg(chan->ccp->dev, "%s - added tx descriptor %d to pending list\n", + __func__, cookie); + + return cookie; +} + +static struct ccp_dma_cmd *ccp_alloc_dma_cmd(struct ccp_dma_chan *chan) +{ + struct ccp_dma_cmd *cmd; + + cmd = kmem_cache_alloc(chan->ccp->dma_cmd_cache, GFP_NOWAIT); + if (cmd) + memset(cmd, 0, sizeof(*cmd)); + + return cmd; +} + +static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan, + unsigned long flags) +{ + struct ccp_dma_desc *desc; + + desc = kmem_cache_zalloc(chan->ccp->dma_desc_cache, GFP_NOWAIT); + if (!desc) + return NULL; + + dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan); + desc->tx_desc.flags = flags; + desc->tx_desc.tx_submit = ccp_tx_submit; + desc->ccp = chan->ccp; + INIT_LIST_HEAD(&desc->entry); + INIT_LIST_HEAD(&desc->pending); + INIT_LIST_HEAD(&desc->active); + desc->status = DMA_IN_PROGRESS; + + return desc; +} + +static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan, + struct scatterlist *dst_sg, + unsigned int dst_nents, + struct scatterlist *src_sg, + unsigned int src_nents, + unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_device *ccp = chan->ccp; + struct ccp_dma_desc *desc; + struct ccp_dma_cmd *cmd; + struct ccp_cmd *ccp_cmd; + struct ccp_passthru_nomap_engine *ccp_pt; + unsigned int src_offset, src_len; + unsigned int dst_offset, dst_len; + unsigned int len; + unsigned long sflags; + size_t total_len; + + if (!dst_sg || !src_sg) + return NULL; + + if (!dst_nents || !src_nents) + return NULL; + + desc = ccp_alloc_dma_desc(chan, flags); + if (!desc) + return NULL; + + total_len = 0; + + src_len = sg_dma_len(src_sg); + src_offset = 0; + + dst_len = sg_dma_len(dst_sg); + dst_offset = 0; + + while (true) { + if (!src_len) { + src_nents--; + if (!src_nents) + break; + + src_sg = sg_next(src_sg); + if (!src_sg) + break; + + src_len = sg_dma_len(src_sg); + src_offset = 0; + continue; + } + + if (!dst_len) { + dst_nents--; + if (!dst_nents) + break; + + dst_sg = sg_next(dst_sg); + if (!dst_sg) + break; + + dst_len = sg_dma_len(dst_sg); + dst_offset = 0; + continue; + } + + len = min(dst_len, src_len); + + cmd = ccp_alloc_dma_cmd(chan); + if (!cmd) + goto err; + + ccp_cmd = &cmd->ccp_cmd; + ccp_cmd->ccp = chan->ccp; + ccp_pt = &ccp_cmd->u.passthru_nomap; + ccp_cmd->flags = CCP_CMD_MAY_BACKLOG; + ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP; + ccp_cmd->engine = CCP_ENGINE_PASSTHRU; + ccp_pt->bit_mod = CCP_PASSTHRU_BITWISE_NOOP; + ccp_pt->byte_swap = CCP_PASSTHRU_BYTESWAP_NOOP; + ccp_pt->src_dma = sg_dma_address(src_sg) + src_offset; + ccp_pt->dst_dma = sg_dma_address(dst_sg) + dst_offset; + ccp_pt->src_len = len; + ccp_pt->final = 1; + ccp_cmd->callback = ccp_cmd_callback; + ccp_cmd->data = desc; + + list_add_tail(&cmd->entry, &desc->pending); + + dev_dbg(ccp->dev, + "%s - cmd=%p, src=%pad, dst=%pad, len=%llu\n", __func__, + cmd, &ccp_pt->src_dma, + &ccp_pt->dst_dma, ccp_pt->src_len); + + total_len += len; + + src_len -= len; + src_offset += len; + + dst_len -= len; + dst_offset += len; + } + + desc->len = total_len; + + if (list_empty(&desc->pending)) + goto err; + + dev_dbg(ccp->dev, "%s - desc=%p\n", __func__, desc); + + spin_lock_irqsave(&chan->lock, sflags); + + list_add_tail(&desc->entry, &chan->created); + + spin_unlock_irqrestore(&chan->lock, sflags); + + return desc; + +err: + ccp_free_cmd_resources(ccp, &desc->pending); + kmem_cache_free(ccp->dma_desc_cache, desc); + + return NULL; +} + +static struct dma_async_tx_descriptor *ccp_prep_dma_memcpy( + struct dma_chan *dma_chan, dma_addr_t dst, dma_addr_t src, size_t len, + unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + struct scatterlist dst_sg, src_sg; + + dev_dbg(chan->ccp->dev, + "%s - src=%pad, dst=%pad, len=%zu, flags=%#lx\n", + __func__, &src, &dst, len, flags); + + sg_init_table(&dst_sg, 1); + sg_dma_address(&dst_sg) = dst; + sg_dma_len(&dst_sg) = len; + + sg_init_table(&src_sg, 1); + sg_dma_address(&src_sg) = src; + sg_dma_len(&src_sg) = len; + + desc = ccp_create_desc(dma_chan, &dst_sg, 1, &src_sg, 1, flags); + if (!desc) + return NULL; + + return &desc->tx_desc; +} + +static struct dma_async_tx_descriptor *ccp_prep_dma_interrupt( + struct dma_chan *dma_chan, unsigned long flags) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + + desc = ccp_alloc_dma_desc(chan, flags); + if (!desc) + return NULL; + + return &desc->tx_desc; +} + +static void ccp_issue_pending(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s\n", __func__); + + spin_lock_irqsave(&chan->lock, flags); + + desc = __ccp_pending_to_active(chan); + + spin_unlock_irqrestore(&chan->lock, flags); + + /* If there was nothing active, start processing */ + if (desc) + ccp_cmd_callback(desc, 0); +} + +static enum dma_status ccp_tx_status(struct dma_chan *dma_chan, + dma_cookie_t cookie, + struct dma_tx_state *state) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + enum dma_status ret; + unsigned long flags; + + if (chan->status == DMA_PAUSED) { + ret = DMA_PAUSED; + goto out; + } + + ret = dma_cookie_status(dma_chan, cookie, state); + if (ret == DMA_COMPLETE) { + spin_lock_irqsave(&chan->lock, flags); + + /* Get status from complete chain, if still there */ + list_for_each_entry(desc, &chan->complete, entry) { + if (desc->tx_desc.cookie != cookie) + continue; + + ret = desc->status; + break; + } + + spin_unlock_irqrestore(&chan->lock, flags); + } + +out: + dev_dbg(chan->ccp->dev, "%s - %u\n", __func__, ret); + + return ret; +} + +static int ccp_pause(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + + chan->status = DMA_PAUSED; + + /*TODO: Wait for active DMA to complete before returning? */ + + return 0; +} + +static int ccp_resume(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + struct ccp_dma_desc *desc; + unsigned long flags; + + spin_lock_irqsave(&chan->lock, flags); + + desc = list_first_entry_or_null(&chan->active, struct ccp_dma_desc, + entry); + + spin_unlock_irqrestore(&chan->lock, flags); + + /* Indicate the channel is running again */ + chan->status = DMA_IN_PROGRESS; + + /* If there was something active, re-start */ + if (desc) + ccp_cmd_callback(desc, 0); + + return 0; +} + +static int ccp_terminate_all(struct dma_chan *dma_chan) +{ + struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan, + dma_chan); + unsigned long flags; + + dev_dbg(chan->ccp->dev, "%s\n", __func__); + + /*TODO: Wait for active DMA to complete before continuing */ + + spin_lock_irqsave(&chan->lock, flags); + + /*TODO: Purge the complete list? */ + ccp_free_desc_resources(chan->ccp, &chan->active); + ccp_free_desc_resources(chan->ccp, &chan->pending); + ccp_free_desc_resources(chan->ccp, &chan->created); + + spin_unlock_irqrestore(&chan->lock, flags); + + return 0; +} + +static void ccp_dma_release(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_chan *dma_chan; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + + tasklet_kill(&chan->cleanup_tasklet); + list_del_rcu(&dma_chan->device_node); + } +} + +static void ccp_dma_release_channels(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_chan *dma_chan; + unsigned int i; + + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + + if (dma_chan->client_count) + dma_release_channel(dma_chan); + } +} + +int ccp_dmaengine_register(struct ccp_device *ccp) +{ + struct ccp_dma_chan *chan; + struct dma_device *dma_dev = &ccp->dma_dev; + struct dma_chan *dma_chan; + char *dma_cmd_cache_name; + char *dma_desc_cache_name; + unsigned int i; + int ret; + + if (!dmaengine) + return 0; + + ccp->ccp_dma_chan = devm_kcalloc(ccp->dev, ccp->cmd_q_count, + sizeof(*(ccp->ccp_dma_chan)), + GFP_KERNEL); + if (!ccp->ccp_dma_chan) + return -ENOMEM; + + dma_cmd_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL, + "%s-dmaengine-cmd-cache", + ccp->name); + if (!dma_cmd_cache_name) + return -ENOMEM; + + ccp->dma_cmd_cache = kmem_cache_create(dma_cmd_cache_name, + sizeof(struct ccp_dma_cmd), + sizeof(void *), + SLAB_HWCACHE_ALIGN, NULL); + if (!ccp->dma_cmd_cache) + return -ENOMEM; + + dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL, + "%s-dmaengine-desc-cache", + ccp->name); + if (!dma_desc_cache_name) { + ret = -ENOMEM; + goto err_cache; + } + + ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name, + sizeof(struct ccp_dma_desc), + sizeof(void *), + SLAB_HWCACHE_ALIGN, NULL); + if (!ccp->dma_desc_cache) { + ret = -ENOMEM; + goto err_cache; + } + + dma_dev->dev = ccp->dev; + dma_dev->src_addr_widths = CCP_DMA_WIDTH(dma_get_mask(ccp->dev)); + dma_dev->dst_addr_widths = CCP_DMA_WIDTH(dma_get_mask(ccp->dev)); + dma_dev->directions = DMA_MEM_TO_MEM; + dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; + dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); + dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); + + /* The DMA channels for this device can be set to public or private, + * and overridden by the module parameter dma_chan_attr. + * Default: according to the value in vdata (dma_chan_attr=0) + * dma_chan_attr=0x1: all channels private (override vdata) + * dma_chan_attr=0x2: all channels public (override vdata) + */ + if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE) + dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); + + INIT_LIST_HEAD(&dma_dev->channels); + for (i = 0; i < ccp->cmd_q_count; i++) { + chan = ccp->ccp_dma_chan + i; + dma_chan = &chan->dma_chan; + + chan->ccp = ccp; + + spin_lock_init(&chan->lock); + INIT_LIST_HEAD(&chan->created); + INIT_LIST_HEAD(&chan->pending); + INIT_LIST_HEAD(&chan->active); + INIT_LIST_HEAD(&chan->complete); + + tasklet_init(&chan->cleanup_tasklet, ccp_do_cleanup, + (unsigned long)chan); + + dma_chan->device = dma_dev; + dma_cookie_init(dma_chan); + + list_add_tail(&dma_chan->device_node, &dma_dev->channels); + } + + dma_dev->device_free_chan_resources = ccp_free_chan_resources; + dma_dev->device_prep_dma_memcpy = ccp_prep_dma_memcpy; + dma_dev->device_prep_dma_interrupt = ccp_prep_dma_interrupt; + dma_dev->device_issue_pending = ccp_issue_pending; + dma_dev->device_tx_status = ccp_tx_status; + dma_dev->device_pause = ccp_pause; + dma_dev->device_resume = ccp_resume; + dma_dev->device_terminate_all = ccp_terminate_all; + + ret = dma_async_device_register(dma_dev); + if (ret) + goto err_reg; + + return 0; + +err_reg: + ccp_dma_release(ccp); + kmem_cache_destroy(ccp->dma_desc_cache); + +err_cache: + kmem_cache_destroy(ccp->dma_cmd_cache); + + return ret; +} + +void ccp_dmaengine_unregister(struct ccp_device *ccp) +{ + struct dma_device *dma_dev = &ccp->dma_dev; + + if (!dmaengine) + return; + + ccp_dma_release_channels(ccp); + dma_async_device_unregister(dma_dev); + ccp_dma_release(ccp); + + kmem_cache_destroy(ccp->dma_desc_cache); + kmem_cache_destroy(ccp->dma_cmd_cache); +} diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c new file mode 100644 index 000000000..7e2fbba94 --- /dev/null +++ b/drivers/crypto/ccp/ccp-ops.c @@ -0,0 +1,2516 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Cryptographic Coprocessor (CCP) driver + * + * Copyright (C) 2013-2019 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/dma-mapping.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/interrupt.h> +#include <crypto/scatterwalk.h> +#include <crypto/des.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" + +/* SHA initial context values */ +static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = { + cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1), + cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3), + cpu_to_be32(SHA1_H4), +}; + +static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = { + cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1), + cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3), + cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5), + cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7), +}; + +static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = { + cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1), + cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3), + cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5), + cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7), +}; + +static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = { + cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1), + cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3), + cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5), + cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7), +}; + +static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = { + cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1), + cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3), + cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5), + cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7), +}; + +#define CCP_NEW_JOBID(ccp) ((ccp->vdata->version == CCP_VERSION(3, 0)) ? \ + ccp_gen_jobid(ccp) : 0) + +static u32 ccp_gen_jobid(struct ccp_device *ccp) +{ + return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK; +} + +static void ccp_sg_free(struct ccp_sg_workarea *wa) +{ + if (wa->dma_count) + dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir); + + wa->dma_count = 0; +} + +static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev, + struct scatterlist *sg, u64 len, + enum dma_data_direction dma_dir) +{ + memset(wa, 0, sizeof(*wa)); + + wa->sg = sg; + if (!sg) + return 0; + + wa->nents = sg_nents_for_len(sg, len); + if (wa->nents < 0) + return wa->nents; + + wa->bytes_left = len; + wa->sg_used = 0; + + if (len == 0) + return 0; + + if (dma_dir == DMA_NONE) + return 0; + + wa->dma_sg = sg; + wa->dma_sg_head = sg; + wa->dma_dev = dev; + wa->dma_dir = dma_dir; + wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir); + if (!wa->dma_count) + return -ENOMEM; + + return 0; +} + +static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len) +{ + unsigned int nbytes = min_t(u64, len, wa->bytes_left); + unsigned int sg_combined_len = 0; + + if (!wa->sg) + return; + + wa->sg_used += nbytes; + wa->bytes_left -= nbytes; + if (wa->sg_used == sg_dma_len(wa->dma_sg)) { + /* Advance to the next DMA scatterlist entry */ + wa->dma_sg = sg_next(wa->dma_sg); + + /* In the case that the DMA mapped scatterlist has entries + * that have been merged, the non-DMA mapped scatterlist + * must be advanced multiple times for each merged entry. + * This ensures that the current non-DMA mapped entry + * corresponds to the current DMA mapped entry. + */ + do { + sg_combined_len += wa->sg->length; + wa->sg = sg_next(wa->sg); + } while (wa->sg_used > sg_combined_len); + + wa->sg_used = 0; + } +} + +static void ccp_dm_free(struct ccp_dm_workarea *wa) +{ + if (wa->length <= CCP_DMAPOOL_MAX_SIZE) { + if (wa->address) + dma_pool_free(wa->dma_pool, wa->address, + wa->dma.address); + } else { + if (wa->dma.address) + dma_unmap_single(wa->dev, wa->dma.address, wa->length, + wa->dma.dir); + kfree(wa->address); + } + + wa->address = NULL; + wa->dma.address = 0; +} + +static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa, + struct ccp_cmd_queue *cmd_q, + unsigned int len, + enum dma_data_direction dir) +{ + memset(wa, 0, sizeof(*wa)); + + if (!len) + return 0; + + wa->dev = cmd_q->ccp->dev; + wa->length = len; + + if (len <= CCP_DMAPOOL_MAX_SIZE) { + wa->dma_pool = cmd_q->dma_pool; + + wa->address = dma_pool_zalloc(wa->dma_pool, GFP_KERNEL, + &wa->dma.address); + if (!wa->address) + return -ENOMEM; + + wa->dma.length = CCP_DMAPOOL_MAX_SIZE; + + } else { + wa->address = kzalloc(len, GFP_KERNEL); + if (!wa->address) + return -ENOMEM; + + wa->dma.address = dma_map_single(wa->dev, wa->address, len, + dir); + if (dma_mapping_error(wa->dev, wa->dma.address)) { + kfree(wa->address); + wa->address = NULL; + return -ENOMEM; + } + + wa->dma.length = len; + } + wa->dma.dir = dir; + + return 0; +} + +static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, + struct scatterlist *sg, unsigned int sg_offset, + unsigned int len) +{ + WARN_ON(!wa->address); + + if (len > (wa->length - wa_offset)) + return -EINVAL; + + scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, + 0); + return 0; +} + +static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset, + struct scatterlist *sg, unsigned int sg_offset, + unsigned int len) +{ + WARN_ON(!wa->address); + + scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len, + 1); +} + +static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa, + unsigned int wa_offset, + struct scatterlist *sg, + unsigned int sg_offset, + unsigned int len) +{ + u8 *p, *q; + int rc; + + rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len); + if (rc) + return rc; + + p = wa->address + wa_offset; + q = p + len - 1; + while (p < q) { + *p = *p ^ *q; + *q = *p ^ *q; + *p = *p ^ *q; + p++; + q--; + } + return 0; +} + +static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa, + unsigned int wa_offset, + struct scatterlist *sg, + unsigned int sg_offset, + unsigned int len) +{ + u8 *p, *q; + + p = wa->address + wa_offset; + q = p + len - 1; + while (p < q) { + *p = *p ^ *q; + *q = *p ^ *q; + *p = *p ^ *q; + p++; + q--; + } + + ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len); +} + +static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q) +{ + ccp_dm_free(&data->dm_wa); + ccp_sg_free(&data->sg_wa); +} + +static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q, + struct scatterlist *sg, u64 sg_len, + unsigned int dm_len, + enum dma_data_direction dir) +{ + int ret; + + memset(data, 0, sizeof(*data)); + + ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len, + dir); + if (ret) + goto e_err; + + ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir); + if (ret) + goto e_err; + + return 0; + +e_err: + ccp_free_data(data, cmd_q); + + return ret; +} + +static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from) +{ + struct ccp_sg_workarea *sg_wa = &data->sg_wa; + struct ccp_dm_workarea *dm_wa = &data->dm_wa; + unsigned int buf_count, nbytes; + + /* Clear the buffer if setting it */ + if (!from) + memset(dm_wa->address, 0, dm_wa->length); + + if (!sg_wa->sg) + return 0; + + /* Perform the copy operation + * nbytes will always be <= UINT_MAX because dm_wa->length is + * an unsigned int + */ + nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length); + scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used, + nbytes, from); + + /* Update the structures and generate the count */ + buf_count = 0; + while (sg_wa->bytes_left && (buf_count < dm_wa->length)) { + nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used, + dm_wa->length - buf_count); + nbytes = min_t(u64, sg_wa->bytes_left, nbytes); + + buf_count += nbytes; + ccp_update_sg_workarea(sg_wa, nbytes); + } + + return buf_count; +} + +static unsigned int ccp_fill_queue_buf(struct ccp_data *data) +{ + return ccp_queue_buf(data, 0); +} + +static unsigned int ccp_empty_queue_buf(struct ccp_data *data) +{ + return ccp_queue_buf(data, 1); +} + +static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst, + struct ccp_op *op, unsigned int block_size, + bool blocksize_op) +{ + unsigned int sg_src_len, sg_dst_len, op_len; + + /* The CCP can only DMA from/to one address each per operation. This + * requires that we find the smallest DMA area between the source + * and destination. The resulting len values will always be <= UINT_MAX + * because the dma length is an unsigned int. + */ + sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used; + sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len); + + if (dst) { + sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used; + sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len); + op_len = min(sg_src_len, sg_dst_len); + } else { + op_len = sg_src_len; + } + + /* The data operation length will be at least block_size in length + * or the smaller of available sg room remaining for the source or + * the destination + */ + op_len = max(op_len, block_size); + + /* Unless we have to buffer data, there's no reason to wait */ + op->soc = 0; + + if (sg_src_len < block_size) { + /* Not enough data in the sg element, so it + * needs to be buffered into a blocksize chunk + */ + int cp_len = ccp_fill_queue_buf(src); + + op->soc = 1; + op->src.u.dma.address = src->dm_wa.dma.address; + op->src.u.dma.offset = 0; + op->src.u.dma.length = (blocksize_op) ? block_size : cp_len; + } else { + /* Enough data in the sg element, but we need to + * adjust for any previously copied data + */ + op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg); + op->src.u.dma.offset = src->sg_wa.sg_used; + op->src.u.dma.length = op_len & ~(block_size - 1); + + ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length); + } + + if (dst) { + if (sg_dst_len < block_size) { + /* Not enough room in the sg element or we're on the + * last piece of data (when using padding), so the + * output needs to be buffered into a blocksize chunk + */ + op->soc = 1; + op->dst.u.dma.address = dst->dm_wa.dma.address; + op->dst.u.dma.offset = 0; + op->dst.u.dma.length = op->src.u.dma.length; + } else { + /* Enough room in the sg element, but we need to + * adjust for any previously used area + */ + op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg); + op->dst.u.dma.offset = dst->sg_wa.sg_used; + op->dst.u.dma.length = op->src.u.dma.length; + } + } +} + +static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst, + struct ccp_op *op) +{ + op->init = 0; + + if (dst) { + if (op->dst.u.dma.address == dst->dm_wa.dma.address) + ccp_empty_queue_buf(dst); + else + ccp_update_sg_workarea(&dst->sg_wa, + op->dst.u.dma.length); + } +} + +static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 sb, + u32 byte_swap, bool from) +{ + struct ccp_op op; + + memset(&op, 0, sizeof(op)); + + op.cmd_q = cmd_q; + op.jobid = jobid; + op.eom = 1; + + if (from) { + op.soc = 1; + op.src.type = CCP_MEMTYPE_SB; + op.src.u.sb = sb; + op.dst.type = CCP_MEMTYPE_SYSTEM; + op.dst.u.dma.address = wa->dma.address; + op.dst.u.dma.length = wa->length; + } else { + op.src.type = CCP_MEMTYPE_SYSTEM; + op.src.u.dma.address = wa->dma.address; + op.src.u.dma.length = wa->length; + op.dst.type = CCP_MEMTYPE_SB; + op.dst.u.sb = sb; + } + + op.u.passthru.byte_swap = byte_swap; + + return cmd_q->ccp->vdata->perform->passthru(&op); +} + +static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 sb, + u32 byte_swap) +{ + return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false); +} + +static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q, + struct ccp_dm_workarea *wa, u32 jobid, u32 sb, + u32 byte_swap) +{ + return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true); +} + +static noinline_for_stack int +ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_aes_engine *aes = &cmd->u.aes; + struct ccp_dm_workarea key, ctx; + struct ccp_data src; + struct ccp_op op; + unsigned int dm_offset; + int ret; + + if (!((aes->key_len == AES_KEYSIZE_128) || + (aes->key_len == AES_KEYSIZE_192) || + (aes->key_len == AES_KEYSIZE_256))) + return -EINVAL; + + if (aes->src_len & (AES_BLOCK_SIZE - 1)) + return -EINVAL; + + if (aes->iv_len != AES_BLOCK_SIZE) + return -EINVAL; + + if (!aes->key || !aes->iv || !aes->src) + return -EINVAL; + + if (aes->cmac_final) { + if (aes->cmac_key_len != AES_BLOCK_SIZE) + return -EINVAL; + + if (!aes->cmac_key) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1); + BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1); + + ret = -EIO; + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_key = cmd_q->sb_key; + op.sb_ctx = cmd_q->sb_ctx; + op.init = 1; + op.u.aes.type = aes->type; + op.u.aes.mode = aes->mode; + op.u.aes.action = aes->action; + + /* All supported key sizes fit in a single (32-byte) SB entry + * and must be in little endian format. Use the 256-bit byte + * swap passthru option to convert from big endian to little + * endian. + */ + ret = ccp_init_dm_workarea(&key, cmd_q, + CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + dm_offset = CCP_SB_BYTES - aes->key_len; + ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); + if (ret) + goto e_key; + ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_key; + } + + /* The AES context fits in a single (32-byte) SB entry and + * must be in little endian format. Use the 256-bit byte swap + * passthru option to convert from big endian to little endian. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + goto e_key; + + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; + ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + if (ret) + goto e_ctx; + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + + /* Send data to the CCP AES engine */ + ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, + AES_BLOCK_SIZE, DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true); + if (aes->cmac_final && !src.sg_wa.bytes_left) { + op.eom = 1; + + /* Push the K1/K2 key to the CCP now */ + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, + op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_src; + } + + ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0, + aes->cmac_key_len); + if (ret) + goto e_src; + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_src; + } + } + + ret = cmd_q->ccp->vdata->perform->aes(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_src; + } + + ccp_process_data(&src, NULL, &op); + } + + /* Retrieve the AES context - convert from LE to BE using + * 32-byte (256-bit) byteswapping + */ + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_src; + } + + /* ...but we only need AES_BLOCK_SIZE bytes */ + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; + ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + +e_src: + ccp_free_data(&src, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + +e_key: + ccp_dm_free(&key); + + return ret; +} + +static noinline_for_stack int +ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_aes_engine *aes = &cmd->u.aes; + struct ccp_dm_workarea key, ctx, final_wa, tag; + struct ccp_data src, dst; + struct ccp_data aad; + struct ccp_op op; + unsigned int dm_offset; + unsigned int authsize; + unsigned int jobid; + unsigned int ilen; + bool in_place = true; /* Default value */ + __be64 *final; + int ret; + + struct scatterlist *p_inp, sg_inp[2]; + struct scatterlist *p_tag, sg_tag[2]; + struct scatterlist *p_outp, sg_outp[2]; + struct scatterlist *p_aad; + + if (!aes->iv) + return -EINVAL; + + if (!((aes->key_len == AES_KEYSIZE_128) || + (aes->key_len == AES_KEYSIZE_192) || + (aes->key_len == AES_KEYSIZE_256))) + return -EINVAL; + + if (!aes->key) /* Gotta have a key SGL */ + return -EINVAL; + + /* Zero defaults to 16 bytes, the maximum size */ + authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE; + switch (authsize) { + case 16: + case 15: + case 14: + case 13: + case 12: + case 8: + case 4: + break; + default: + return -EINVAL; + } + + /* First, decompose the source buffer into AAD & PT, + * and the destination buffer into AAD, CT & tag, or + * the input into CT & tag. + * It is expected that the input and output SGs will + * be valid, even if the AAD and input lengths are 0. + */ + p_aad = aes->src; + p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len); + p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len); + if (aes->action == CCP_AES_ACTION_ENCRYPT) { + ilen = aes->src_len; + p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen); + } else { + /* Input length for decryption includes tag */ + ilen = aes->src_len - authsize; + p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen); + } + + jobid = CCP_NEW_JOBID(cmd_q->ccp); + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = jobid; + op.sb_key = cmd_q->sb_key; /* Pre-allocated */ + op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ + op.init = 1; + op.u.aes.type = aes->type; + + /* Copy the key to the LSB */ + ret = ccp_init_dm_workarea(&key, cmd_q, + CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + dm_offset = CCP_SB_BYTES - aes->key_len; + ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); + if (ret) + goto e_key; + ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_key; + } + + /* Copy the context (IV) to the LSB. + * There is an assumption here that the IV is 96 bits in length, plus + * a nonce of 32 bits. If no IV is present, use a zeroed buffer. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + goto e_key; + + dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len; + ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + if (ret) + goto e_ctx; + + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + + op.init = 1; + if (aes->aad_len > 0) { + /* Step 1: Run a GHASH over the Additional Authenticated Data */ + ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len, + AES_BLOCK_SIZE, + DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + op.u.aes.mode = CCP_AES_MODE_GHASH; + op.u.aes.action = CCP_AES_GHASHAAD; + + while (aad.sg_wa.bytes_left) { + ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true); + + ret = cmd_q->ccp->vdata->perform->aes(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_aad; + } + + ccp_process_data(&aad, NULL, &op); + op.init = 0; + } + } + + op.u.aes.mode = CCP_AES_MODE_GCTR; + op.u.aes.action = aes->action; + + if (ilen > 0) { + /* Step 2: Run a GCTR over the plaintext */ + in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false; + + ret = ccp_init_data(&src, cmd_q, p_inp, ilen, + AES_BLOCK_SIZE, + in_place ? DMA_BIDIRECTIONAL + : DMA_TO_DEVICE); + if (ret) + goto e_aad; + + if (in_place) { + dst = src; + } else { + ret = ccp_init_data(&dst, cmd_q, p_outp, ilen, + AES_BLOCK_SIZE, DMA_FROM_DEVICE); + if (ret) + goto e_src; + } + + op.soc = 0; + op.eom = 0; + op.init = 1; + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); + if (!src.sg_wa.bytes_left) { + unsigned int nbytes = ilen % AES_BLOCK_SIZE; + + if (nbytes) { + op.eom = 1; + op.u.aes.size = (nbytes * 8) - 1; + } + } + + ret = cmd_q->ccp->vdata->perform->aes(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ccp_process_data(&src, &dst, &op); + op.init = 0; + } + } + + /* Step 3: Update the IV portion of the context with the original IV */ + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + if (ret) + goto e_dst; + + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + /* Step 4: Concatenate the lengths of the AAD and source, and + * hash that 16 byte buffer. + */ + ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE, + DMA_BIDIRECTIONAL); + if (ret) + goto e_dst; + final = (__be64 *)final_wa.address; + final[0] = cpu_to_be64(aes->aad_len * 8); + final[1] = cpu_to_be64(ilen * 8); + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = jobid; + op.sb_key = cmd_q->sb_key; /* Pre-allocated */ + op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ + op.init = 1; + op.u.aes.type = aes->type; + op.u.aes.mode = CCP_AES_MODE_GHASH; + op.u.aes.action = CCP_AES_GHASHFINAL; + op.src.type = CCP_MEMTYPE_SYSTEM; + op.src.u.dma.address = final_wa.dma.address; + op.src.u.dma.length = AES_BLOCK_SIZE; + op.dst.type = CCP_MEMTYPE_SYSTEM; + op.dst.u.dma.address = final_wa.dma.address; + op.dst.u.dma.length = AES_BLOCK_SIZE; + op.eom = 1; + op.u.aes.size = 0; + ret = cmd_q->ccp->vdata->perform->aes(&op); + if (ret) + goto e_final_wa; + + if (aes->action == CCP_AES_ACTION_ENCRYPT) { + /* Put the ciphered tag after the ciphertext. */ + ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize); + } else { + /* Does this ciphered tag match the input? */ + ret = ccp_init_dm_workarea(&tag, cmd_q, authsize, + DMA_BIDIRECTIONAL); + if (ret) + goto e_final_wa; + ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize); + if (ret) { + ccp_dm_free(&tag); + goto e_final_wa; + } + + ret = crypto_memneq(tag.address, final_wa.address, + authsize) ? -EBADMSG : 0; + ccp_dm_free(&tag); + } + +e_final_wa: + ccp_dm_free(&final_wa); + +e_dst: + if (ilen > 0 && !in_place) + ccp_free_data(&dst, cmd_q); + +e_src: + if (ilen > 0) + ccp_free_data(&src, cmd_q); + +e_aad: + if (aes->aad_len) + ccp_free_data(&aad, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + +e_key: + ccp_dm_free(&key); + + return ret; +} + +static noinline_for_stack int +ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_aes_engine *aes = &cmd->u.aes; + struct ccp_dm_workarea key, ctx; + struct ccp_data src, dst; + struct ccp_op op; + unsigned int dm_offset; + bool in_place = false; + int ret; + + if (!((aes->key_len == AES_KEYSIZE_128) || + (aes->key_len == AES_KEYSIZE_192) || + (aes->key_len == AES_KEYSIZE_256))) + return -EINVAL; + + if (((aes->mode == CCP_AES_MODE_ECB) || + (aes->mode == CCP_AES_MODE_CBC)) && + (aes->src_len & (AES_BLOCK_SIZE - 1))) + return -EINVAL; + + if (!aes->key || !aes->src || !aes->dst) + return -EINVAL; + + if (aes->mode != CCP_AES_MODE_ECB) { + if (aes->iv_len != AES_BLOCK_SIZE) + return -EINVAL; + + if (!aes->iv) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1); + BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1); + + ret = -EIO; + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_key = cmd_q->sb_key; + op.sb_ctx = cmd_q->sb_ctx; + op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1; + op.u.aes.type = aes->type; + op.u.aes.mode = aes->mode; + op.u.aes.action = aes->action; + + /* All supported key sizes fit in a single (32-byte) SB entry + * and must be in little endian format. Use the 256-bit byte + * swap passthru option to convert from big endian to little + * endian. + */ + ret = ccp_init_dm_workarea(&key, cmd_q, + CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + dm_offset = CCP_SB_BYTES - aes->key_len; + ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len); + if (ret) + goto e_key; + ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_key; + } + + /* The AES context fits in a single (32-byte) SB entry and + * must be in little endian format. Use the 256-bit byte swap + * passthru option to convert from big endian to little endian. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + goto e_key; + + if (aes->mode != CCP_AES_MODE_ECB) { + /* Load the AES context - convert to LE */ + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; + ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + if (ret) + goto e_ctx; + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + } + switch (aes->mode) { + case CCP_AES_MODE_CFB: /* CFB128 only */ + case CCP_AES_MODE_CTR: + op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1; + break; + default: + op.u.aes.size = 0; + } + + /* Prepare the input and output data workareas. For in-place + * operations we need to set the dma direction to BIDIRECTIONAL + * and copy the src workarea to the dst workarea. + */ + if (sg_virt(aes->src) == sg_virt(aes->dst)) + in_place = true; + + ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len, + AES_BLOCK_SIZE, + in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + if (in_place) { + dst = src; + } else { + ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len, + AES_BLOCK_SIZE, DMA_FROM_DEVICE); + if (ret) + goto e_src; + } + + /* Send data to the CCP AES engine */ + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true); + if (!src.sg_wa.bytes_left) { + op.eom = 1; + + /* Since we don't retrieve the AES context in ECB + * mode we have to wait for the operation to complete + * on the last piece of data + */ + if (aes->mode == CCP_AES_MODE_ECB) + op.soc = 1; + } + + ret = cmd_q->ccp->vdata->perform->aes(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ccp_process_data(&src, &dst, &op); + } + + if (aes->mode != CCP_AES_MODE_ECB) { + /* Retrieve the AES context - convert from LE to BE using + * 32-byte (256-bit) byteswapping + */ + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + /* ...but we only need AES_BLOCK_SIZE bytes */ + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; + ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len); + } + +e_dst: + if (!in_place) + ccp_free_data(&dst, cmd_q); + +e_src: + ccp_free_data(&src, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + +e_key: + ccp_dm_free(&key); + + return ret; +} + +static noinline_for_stack int +ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_xts_aes_engine *xts = &cmd->u.xts; + struct ccp_dm_workarea key, ctx; + struct ccp_data src, dst; + struct ccp_op op; + unsigned int unit_size, dm_offset; + bool in_place = false; + unsigned int sb_count; + enum ccp_aes_type aestype; + int ret; + + switch (xts->unit_size) { + case CCP_XTS_AES_UNIT_SIZE_16: + unit_size = 16; + break; + case CCP_XTS_AES_UNIT_SIZE_512: + unit_size = 512; + break; + case CCP_XTS_AES_UNIT_SIZE_1024: + unit_size = 1024; + break; + case CCP_XTS_AES_UNIT_SIZE_2048: + unit_size = 2048; + break; + case CCP_XTS_AES_UNIT_SIZE_4096: + unit_size = 4096; + break; + + default: + return -EINVAL; + } + + if (xts->key_len == AES_KEYSIZE_128) + aestype = CCP_AES_TYPE_128; + else if (xts->key_len == AES_KEYSIZE_256) + aestype = CCP_AES_TYPE_256; + else + return -EINVAL; + + if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1))) + return -EINVAL; + + if (xts->iv_len != AES_BLOCK_SIZE) + return -EINVAL; + + if (!xts->key || !xts->iv || !xts->src || !xts->dst) + return -EINVAL; + + BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1); + BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1); + + ret = -EIO; + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_key = cmd_q->sb_key; + op.sb_ctx = cmd_q->sb_ctx; + op.init = 1; + op.u.xts.type = aestype; + op.u.xts.action = xts->action; + op.u.xts.unit_size = xts->unit_size; + + /* A version 3 device only supports 128-bit keys, which fits into a + * single SB entry. A version 5 device uses a 512-bit vector, so two + * SB entries. + */ + if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) + sb_count = CCP_XTS_AES_KEY_SB_COUNT; + else + sb_count = CCP5_XTS_AES_KEY_SB_COUNT; + ret = ccp_init_dm_workarea(&key, cmd_q, + sb_count * CCP_SB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) { + /* All supported key sizes must be in little endian format. + * Use the 256-bit byte swap passthru option to convert from + * big endian to little endian. + */ + dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128; + ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len); + if (ret) + goto e_key; + ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len); + if (ret) + goto e_key; + } else { + /* Version 5 CCPs use a 512-bit space for the key: each portion + * occupies 256 bits, or one entire slot, and is zero-padded. + */ + unsigned int pad; + + dm_offset = CCP_SB_BYTES; + pad = dm_offset - xts->key_len; + ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len); + if (ret) + goto e_key; + ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key, + xts->key_len, xts->key_len); + if (ret) + goto e_key; + } + ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_key; + } + + /* The AES context fits in a single (32-byte) SB entry and + * for XTS is already in little endian format so no byte swapping + * is needed. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + goto e_key; + + ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len); + if (ret) + goto e_ctx; + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + + /* Prepare the input and output data workareas. For in-place + * operations we need to set the dma direction to BIDIRECTIONAL + * and copy the src workarea to the dst workarea. + */ + if (sg_virt(xts->src) == sg_virt(xts->dst)) + in_place = true; + + ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len, + unit_size, + in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + if (in_place) { + dst = src; + } else { + ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len, + unit_size, DMA_FROM_DEVICE); + if (ret) + goto e_src; + } + + /* Send data to the CCP AES engine */ + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, &dst, &op, unit_size, true); + if (!src.sg_wa.bytes_left) + op.eom = 1; + + ret = cmd_q->ccp->vdata->perform->xts_aes(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ccp_process_data(&src, &dst, &op); + } + + /* Retrieve the AES context - convert from LE to BE using + * 32-byte (256-bit) byteswapping + */ + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + /* ...but we only need AES_BLOCK_SIZE bytes */ + dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE; + ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len); + +e_dst: + if (!in_place) + ccp_free_data(&dst, cmd_q); + +e_src: + ccp_free_data(&src, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + +e_key: + ccp_dm_free(&key); + + return ret; +} + +static noinline_for_stack int +ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_des3_engine *des3 = &cmd->u.des3; + + struct ccp_dm_workarea key, ctx; + struct ccp_data src, dst; + struct ccp_op op; + unsigned int dm_offset; + unsigned int len_singlekey; + bool in_place = false; + int ret; + + /* Error checks */ + if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) + return -EINVAL; + + if (!cmd_q->ccp->vdata->perform->des3) + return -EINVAL; + + if (des3->key_len != DES3_EDE_KEY_SIZE) + return -EINVAL; + + if (((des3->mode == CCP_DES3_MODE_ECB) || + (des3->mode == CCP_DES3_MODE_CBC)) && + (des3->src_len & (DES3_EDE_BLOCK_SIZE - 1))) + return -EINVAL; + + if (!des3->key || !des3->src || !des3->dst) + return -EINVAL; + + if (des3->mode != CCP_DES3_MODE_ECB) { + if (des3->iv_len != DES3_EDE_BLOCK_SIZE) + return -EINVAL; + + if (!des3->iv) + return -EINVAL; + } + + /* Zero out all the fields of the command desc */ + memset(&op, 0, sizeof(op)); + + /* Set up the Function field */ + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_key = cmd_q->sb_key; + + op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1; + op.u.des3.type = des3->type; + op.u.des3.mode = des3->mode; + op.u.des3.action = des3->action; + + /* + * All supported key sizes fit in a single (32-byte) KSB entry and + * (like AES) must be in little endian format. Use the 256-bit byte + * swap passthru option to convert from big endian to little endian. + */ + ret = ccp_init_dm_workarea(&key, cmd_q, + CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + /* + * The contents of the key triplet are in the reverse order of what + * is required by the engine. Copy the 3 pieces individually to put + * them where they belong. + */ + dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */ + + len_singlekey = des3->key_len / 3; + ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey, + des3->key, 0, len_singlekey); + if (ret) + goto e_key; + ret = ccp_set_dm_area(&key, dm_offset + len_singlekey, + des3->key, len_singlekey, len_singlekey); + if (ret) + goto e_key; + ret = ccp_set_dm_area(&key, dm_offset, + des3->key, 2 * len_singlekey, len_singlekey); + if (ret) + goto e_key; + + /* Copy the key to the SB */ + ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_key; + } + + /* + * The DES3 context fits in a single (32-byte) KSB entry and + * must be in little endian format. Use the 256-bit byte swap + * passthru option to convert from big endian to little endian. + */ + if (des3->mode != CCP_DES3_MODE_ECB) { + op.sb_ctx = cmd_q->sb_ctx; + + ret = ccp_init_dm_workarea(&ctx, cmd_q, + CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + goto e_key; + + /* Load the context into the LSB */ + dm_offset = CCP_SB_BYTES - des3->iv_len; + ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0, + des3->iv_len); + if (ret) + goto e_ctx; + + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + } + + /* + * Prepare the input and output data workareas. For in-place + * operations we need to set the dma direction to BIDIRECTIONAL + * and copy the src workarea to the dst workarea. + */ + if (sg_virt(des3->src) == sg_virt(des3->dst)) + in_place = true; + + ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len, + DES3_EDE_BLOCK_SIZE, + in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + if (in_place) + dst = src; + else { + ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len, + DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE); + if (ret) + goto e_src; + } + + /* Send data to the CCP DES3 engine */ + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true); + if (!src.sg_wa.bytes_left) { + op.eom = 1; + + /* Since we don't retrieve the context in ECB mode + * we have to wait for the operation to complete + * on the last piece of data + */ + op.soc = 0; + } + + ret = cmd_q->ccp->vdata->perform->des3(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ccp_process_data(&src, &dst, &op); + } + + if (des3->mode != CCP_DES3_MODE_ECB) { + /* Retrieve the context and make BE */ + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + /* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */ + ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0, + DES3_EDE_BLOCK_SIZE); + } +e_dst: + if (!in_place) + ccp_free_data(&dst, cmd_q); + +e_src: + ccp_free_data(&src, cmd_q); + +e_ctx: + if (des3->mode != CCP_DES3_MODE_ECB) + ccp_dm_free(&ctx); + +e_key: + ccp_dm_free(&key); + + return ret; +} + +static noinline_for_stack int +ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_sha_engine *sha = &cmd->u.sha; + struct ccp_dm_workarea ctx; + struct ccp_data src; + struct ccp_op op; + unsigned int ioffset, ooffset; + unsigned int digest_size; + int sb_count; + const void *init; + u64 block_size; + int ctx_size; + int ret; + + switch (sha->type) { + case CCP_SHA_TYPE_1: + if (sha->ctx_len < SHA1_DIGEST_SIZE) + return -EINVAL; + block_size = SHA1_BLOCK_SIZE; + break; + case CCP_SHA_TYPE_224: + if (sha->ctx_len < SHA224_DIGEST_SIZE) + return -EINVAL; + block_size = SHA224_BLOCK_SIZE; + break; + case CCP_SHA_TYPE_256: + if (sha->ctx_len < SHA256_DIGEST_SIZE) + return -EINVAL; + block_size = SHA256_BLOCK_SIZE; + break; + case CCP_SHA_TYPE_384: + if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0) + || sha->ctx_len < SHA384_DIGEST_SIZE) + return -EINVAL; + block_size = SHA384_BLOCK_SIZE; + break; + case CCP_SHA_TYPE_512: + if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0) + || sha->ctx_len < SHA512_DIGEST_SIZE) + return -EINVAL; + block_size = SHA512_BLOCK_SIZE; + break; + default: + return -EINVAL; + } + + if (!sha->ctx) + return -EINVAL; + + if (!sha->final && (sha->src_len & (block_size - 1))) + return -EINVAL; + + /* The version 3 device can't handle zero-length input */ + if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) { + + if (!sha->src_len) { + unsigned int digest_len; + const u8 *sha_zero; + + /* Not final, just return */ + if (!sha->final) + return 0; + + /* CCP can't do a zero length sha operation so the + * caller must buffer the data. + */ + if (sha->msg_bits) + return -EINVAL; + + /* The CCP cannot perform zero-length sha operations + * so the caller is required to buffer data for the + * final operation. However, a sha operation for a + * message with a total length of zero is valid so + * known values are required to supply the result. + */ + switch (sha->type) { + case CCP_SHA_TYPE_1: + sha_zero = sha1_zero_message_hash; + digest_len = SHA1_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_224: + sha_zero = sha224_zero_message_hash; + digest_len = SHA224_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_256: + sha_zero = sha256_zero_message_hash; + digest_len = SHA256_DIGEST_SIZE; + break; + default: + return -EINVAL; + } + + scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0, + digest_len, 1); + + return 0; + } + } + + /* Set variables used throughout */ + switch (sha->type) { + case CCP_SHA_TYPE_1: + digest_size = SHA1_DIGEST_SIZE; + init = (void *) ccp_sha1_init; + ctx_size = SHA1_DIGEST_SIZE; + sb_count = 1; + if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0)) + ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE; + else + ooffset = ioffset = 0; + break; + case CCP_SHA_TYPE_224: + digest_size = SHA224_DIGEST_SIZE; + init = (void *) ccp_sha224_init; + ctx_size = SHA256_DIGEST_SIZE; + sb_count = 1; + ioffset = 0; + if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0)) + ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE; + else + ooffset = 0; + break; + case CCP_SHA_TYPE_256: + digest_size = SHA256_DIGEST_SIZE; + init = (void *) ccp_sha256_init; + ctx_size = SHA256_DIGEST_SIZE; + sb_count = 1; + ooffset = ioffset = 0; + break; + case CCP_SHA_TYPE_384: + digest_size = SHA384_DIGEST_SIZE; + init = (void *) ccp_sha384_init; + ctx_size = SHA512_DIGEST_SIZE; + sb_count = 2; + ioffset = 0; + ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_512: + digest_size = SHA512_DIGEST_SIZE; + init = (void *) ccp_sha512_init; + ctx_size = SHA512_DIGEST_SIZE; + sb_count = 2; + ooffset = ioffset = 0; + break; + default: + ret = -EINVAL; + goto e_data; + } + + /* For zero-length plaintext the src pointer is ignored; + * otherwise both parts must be valid + */ + if (sha->src_len && !sha->src) + return -EINVAL; + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */ + op.u.sha.type = sha->type; + op.u.sha.msg_bits = sha->msg_bits; + + /* For SHA1/224/256 the context fits in a single (32-byte) SB entry; + * SHA384/512 require 2 adjacent SB slots, with the right half in the + * first slot, and the left half in the second. Each portion must then + * be in little endian format: use the 256-bit byte swap option. + */ + ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES, + DMA_BIDIRECTIONAL); + if (ret) + return ret; + if (sha->first) { + switch (sha->type) { + case CCP_SHA_TYPE_1: + case CCP_SHA_TYPE_224: + case CCP_SHA_TYPE_256: + memcpy(ctx.address + ioffset, init, ctx_size); + break; + case CCP_SHA_TYPE_384: + case CCP_SHA_TYPE_512: + memcpy(ctx.address + ctx_size / 2, init, + ctx_size / 2); + memcpy(ctx.address, init + ctx_size / 2, + ctx_size / 2); + break; + default: + ret = -EINVAL; + goto e_ctx; + } + } else { + /* Restore the context */ + ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0, + sb_count * CCP_SB_BYTES); + if (ret) + goto e_ctx; + } + + ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_ctx; + } + + if (sha->src) { + /* Send data to the CCP SHA engine; block_size is set above */ + ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len, + block_size, DMA_TO_DEVICE); + if (ret) + goto e_ctx; + + while (src.sg_wa.bytes_left) { + ccp_prepare_data(&src, NULL, &op, block_size, false); + if (sha->final && !src.sg_wa.bytes_left) + op.eom = 1; + + ret = cmd_q->ccp->vdata->perform->sha(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_data; + } + + ccp_process_data(&src, NULL, &op); + } + } else { + op.eom = 1; + ret = cmd_q->ccp->vdata->perform->sha(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_data; + } + } + + /* Retrieve the SHA context - convert from LE to BE using + * 32-byte (256-bit) byteswapping to BE + */ + ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx, + CCP_PASSTHRU_BYTESWAP_256BIT); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_data; + } + + if (sha->final) { + /* Finishing up, so get the digest */ + switch (sha->type) { + case CCP_SHA_TYPE_1: + case CCP_SHA_TYPE_224: + case CCP_SHA_TYPE_256: + ccp_get_dm_area(&ctx, ooffset, + sha->ctx, 0, + digest_size); + break; + case CCP_SHA_TYPE_384: + case CCP_SHA_TYPE_512: + ccp_get_dm_area(&ctx, 0, + sha->ctx, LSB_ITEM_SIZE - ooffset, + LSB_ITEM_SIZE); + ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset, + sha->ctx, 0, + LSB_ITEM_SIZE - ooffset); + break; + default: + ret = -EINVAL; + goto e_data; + } + } else { + /* Stash the context */ + ccp_get_dm_area(&ctx, 0, sha->ctx, 0, + sb_count * CCP_SB_BYTES); + } + + if (sha->final && sha->opad) { + /* HMAC operation, recursively perform final SHA */ + struct ccp_cmd hmac_cmd; + struct scatterlist sg; + u8 *hmac_buf; + + if (sha->opad_len != block_size) { + ret = -EINVAL; + goto e_data; + } + + hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL); + if (!hmac_buf) { + ret = -ENOMEM; + goto e_data; + } + sg_init_one(&sg, hmac_buf, block_size + digest_size); + + scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0); + switch (sha->type) { + case CCP_SHA_TYPE_1: + case CCP_SHA_TYPE_224: + case CCP_SHA_TYPE_256: + memcpy(hmac_buf + block_size, + ctx.address + ooffset, + digest_size); + break; + case CCP_SHA_TYPE_384: + case CCP_SHA_TYPE_512: + memcpy(hmac_buf + block_size, + ctx.address + LSB_ITEM_SIZE + ooffset, + LSB_ITEM_SIZE); + memcpy(hmac_buf + block_size + + (LSB_ITEM_SIZE - ooffset), + ctx.address, + LSB_ITEM_SIZE); + break; + default: + kfree(hmac_buf); + ret = -EINVAL; + goto e_data; + } + + memset(&hmac_cmd, 0, sizeof(hmac_cmd)); + hmac_cmd.engine = CCP_ENGINE_SHA; + hmac_cmd.u.sha.type = sha->type; + hmac_cmd.u.sha.ctx = sha->ctx; + hmac_cmd.u.sha.ctx_len = sha->ctx_len; + hmac_cmd.u.sha.src = &sg; + hmac_cmd.u.sha.src_len = block_size + digest_size; + hmac_cmd.u.sha.opad = NULL; + hmac_cmd.u.sha.opad_len = 0; + hmac_cmd.u.sha.first = 1; + hmac_cmd.u.sha.final = 1; + hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3; + + ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd); + if (ret) + cmd->engine_error = hmac_cmd.engine_error; + + kfree(hmac_buf); + } + +e_data: + if (sha->src) + ccp_free_data(&src, cmd_q); + +e_ctx: + ccp_dm_free(&ctx); + + return ret; +} + +static noinline_for_stack int +ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_rsa_engine *rsa = &cmd->u.rsa; + struct ccp_dm_workarea exp, src, dst; + struct ccp_op op; + unsigned int sb_count, i_len, o_len; + int ret; + + /* Check against the maximum allowable size, in bits */ + if (rsa->key_size > cmd_q->ccp->vdata->rsamax) + return -EINVAL; + + if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst) + return -EINVAL; + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + + /* The RSA modulus must precede the message being acted upon, so + * it must be copied to a DMA area where the message and the + * modulus can be concatenated. Therefore the input buffer + * length required is twice the output buffer length (which + * must be a multiple of 256-bits). Compute o_len, i_len in bytes. + * Buffer sizes must be a multiple of 32 bytes; rounding up may be + * required. + */ + o_len = 32 * ((rsa->key_size + 255) / 256); + i_len = o_len * 2; + + sb_count = 0; + if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) { + /* sb_count is the number of storage block slots required + * for the modulus. + */ + sb_count = o_len / CCP_SB_BYTES; + op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, + sb_count); + if (!op.sb_key) + return -EIO; + } else { + /* A version 5 device allows a modulus size that will not fit + * in the LSB, so the command will transfer it from memory. + * Set the sb key to the default, even though it's not used. + */ + op.sb_key = cmd_q->sb_key; + } + + /* The RSA exponent must be in little endian format. Reverse its + * byte order. + */ + ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE); + if (ret) + goto e_sb; + + ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len); + if (ret) + goto e_exp; + + if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) { + /* Copy the exponent to the local storage block, using + * as many 32-byte blocks as were allocated above. It's + * already little endian, so no further change is required. + */ + ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_exp; + } + } else { + /* The exponent can be retrieved from memory via DMA. */ + op.exp.u.dma.address = exp.dma.address; + op.exp.u.dma.offset = 0; + } + + /* Concatenate the modulus and the message. Both the modulus and + * the operands must be in little endian format. Since the input + * is in big endian format it must be converted. + */ + ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE); + if (ret) + goto e_exp; + + ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len); + if (ret) + goto e_src; + ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len); + if (ret) + goto e_src; + + /* Prepare the output area for the operation */ + ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE); + if (ret) + goto e_src; + + op.soc = 1; + op.src.u.dma.address = src.dma.address; + op.src.u.dma.offset = 0; + op.src.u.dma.length = i_len; + op.dst.u.dma.address = dst.dma.address; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = o_len; + + op.u.rsa.mod_size = rsa->key_size; + op.u.rsa.input_len = i_len; + + ret = cmd_q->ccp->vdata->perform->rsa(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len); + +e_dst: + ccp_dm_free(&dst); + +e_src: + ccp_dm_free(&src); + +e_exp: + ccp_dm_free(&exp); + +e_sb: + if (sb_count) + cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count); + + return ret; +} + +static noinline_for_stack int +ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_passthru_engine *pt = &cmd->u.passthru; + struct ccp_dm_workarea mask; + struct ccp_data src, dst; + struct ccp_op op; + bool in_place = false; + unsigned int i; + int ret = 0; + + if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) + return -EINVAL; + + if (!pt->src || !pt->dst) + return -EINVAL; + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) + return -EINVAL; + if (!pt->mask) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1); + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + /* Load the mask */ + op.sb_key = cmd_q->sb_key; + + ret = ccp_init_dm_workarea(&mask, cmd_q, + CCP_PASSTHRU_SB_COUNT * + CCP_SB_BYTES, + DMA_TO_DEVICE); + if (ret) + return ret; + + ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len); + if (ret) + goto e_mask; + ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_mask; + } + } + + /* Prepare the input and output data workareas. For in-place + * operations we need to set the dma direction to BIDIRECTIONAL + * and copy the src workarea to the dst workarea. + */ + if (sg_virt(pt->src) == sg_virt(pt->dst)) + in_place = true; + + ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len, + CCP_PASSTHRU_MASKSIZE, + in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); + if (ret) + goto e_mask; + + if (in_place) { + dst = src; + } else { + ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len, + CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE); + if (ret) + goto e_src; + } + + /* Send data to the CCP Passthru engine + * Because the CCP engine works on a single source and destination + * dma address at a time, each entry in the source scatterlist + * (after the dma_map_sg call) must be less than or equal to the + * (remaining) length in the destination scatterlist entry and the + * length must be a multiple of CCP_PASSTHRU_BLOCKSIZE + */ + dst.sg_wa.sg_used = 0; + for (i = 1; i <= src.sg_wa.dma_count; i++) { + if (!dst.sg_wa.sg || + (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) { + ret = -EINVAL; + goto e_dst; + } + + if (i == src.sg_wa.dma_count) { + op.eom = 1; + op.soc = 1; + } + + op.src.type = CCP_MEMTYPE_SYSTEM; + op.src.u.dma.address = sg_dma_address(src.sg_wa.sg); + op.src.u.dma.offset = 0; + op.src.u.dma.length = sg_dma_len(src.sg_wa.sg); + + op.dst.type = CCP_MEMTYPE_SYSTEM; + op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg); + op.dst.u.dma.offset = dst.sg_wa.sg_used; + op.dst.u.dma.length = op.src.u.dma.length; + + ret = cmd_q->ccp->vdata->perform->passthru(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg); + if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) { + dst.sg_wa.sg = sg_next(dst.sg_wa.sg); + dst.sg_wa.sg_used = 0; + } + src.sg_wa.sg = sg_next(src.sg_wa.sg); + } + +e_dst: + if (!in_place) + ccp_free_data(&dst, cmd_q); + +e_src: + ccp_free_data(&src, cmd_q); + +e_mask: + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) + ccp_dm_free(&mask); + + return ret; +} + +static noinline_for_stack int +ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q, + struct ccp_cmd *cmd) +{ + struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap; + struct ccp_dm_workarea mask; + struct ccp_op op; + int ret; + + if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1))) + return -EINVAL; + + if (!pt->src_dma || !pt->dst_dma) + return -EINVAL; + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + if (pt->mask_len != CCP_PASSTHRU_MASKSIZE) + return -EINVAL; + if (!pt->mask) + return -EINVAL; + } + + BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1); + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + + if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) { + /* Load the mask */ + op.sb_key = cmd_q->sb_key; + + mask.length = pt->mask_len; + mask.dma.address = pt->mask; + mask.dma.length = pt->mask_len; + + ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key, + CCP_PASSTHRU_BYTESWAP_NOOP); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + return ret; + } + } + + /* Send data to the CCP Passthru engine */ + op.eom = 1; + op.soc = 1; + + op.src.type = CCP_MEMTYPE_SYSTEM; + op.src.u.dma.address = pt->src_dma; + op.src.u.dma.offset = 0; + op.src.u.dma.length = pt->src_len; + + op.dst.type = CCP_MEMTYPE_SYSTEM; + op.dst.u.dma.address = pt->dst_dma; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = pt->src_len; + + ret = cmd_q->ccp->vdata->perform->passthru(&op); + if (ret) + cmd->engine_error = cmd_q->cmd_error; + + return ret; +} + +static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_ecc_engine *ecc = &cmd->u.ecc; + struct ccp_dm_workarea src, dst; + struct ccp_op op; + int ret; + u8 *save; + + if (!ecc->u.mm.operand_1 || + (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) + if (!ecc->u.mm.operand_2 || + (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + if (!ecc->u.mm.result || + (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + + /* Concatenate the modulus and the operands. Both the modulus and + * the operands must be in little endian format. Since the input + * is in big endian format it must be converted and placed in a + * fixed length buffer. + */ + ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, + DMA_TO_DEVICE); + if (ret) + return ret; + + /* Save the workarea address since it is updated in order to perform + * the concatenation + */ + save = src.address; + + /* Copy the ECC modulus */ + ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + + /* Copy the first operand */ + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0, + ecc->u.mm.operand_1_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + + if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) { + /* Copy the second operand */ + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0, + ecc->u.mm.operand_2_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + } + + /* Restore the workarea address */ + src.address = save; + + /* Prepare the output area for the operation */ + ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, + DMA_FROM_DEVICE); + if (ret) + goto e_src; + + op.soc = 1; + op.src.u.dma.address = src.dma.address; + op.src.u.dma.offset = 0; + op.src.u.dma.length = src.length; + op.dst.u.dma.address = dst.dma.address; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = dst.length; + + op.u.ecc.function = cmd->u.ecc.function; + + ret = cmd_q->ccp->vdata->perform->ecc(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ecc->ecc_result = le16_to_cpup( + (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); + if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { + ret = -EIO; + goto e_dst; + } + + /* Save the ECC result */ + ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0, + CCP_ECC_MODULUS_BYTES); + +e_dst: + ccp_dm_free(&dst); + +e_src: + ccp_dm_free(&src); + + return ret; +} + +static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_ecc_engine *ecc = &cmd->u.ecc; + struct ccp_dm_workarea src, dst; + struct ccp_op op; + int ret; + u8 *save; + + if (!ecc->u.pm.point_1.x || + (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) || + !ecc->u.pm.point_1.y || + (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { + if (!ecc->u.pm.point_2.x || + (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) || + !ecc->u.pm.point_2.y || + (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + } else { + if (!ecc->u.pm.domain_a || + (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) + if (!ecc->u.pm.scalar || + (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + } + + if (!ecc->u.pm.result.x || + (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) || + !ecc->u.pm.result.y || + (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + memset(&op, 0, sizeof(op)); + op.cmd_q = cmd_q; + op.jobid = CCP_NEW_JOBID(cmd_q->ccp); + + /* Concatenate the modulus and the operands. Both the modulus and + * the operands must be in little endian format. Since the input + * is in big endian format it must be converted and placed in a + * fixed length buffer. + */ + ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE, + DMA_TO_DEVICE); + if (ret) + return ret; + + /* Save the workarea address since it is updated in order to perform + * the concatenation + */ + save = src.address; + + /* Copy the ECC modulus */ + ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + + /* Copy the first point X and Y coordinate */ + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0, + ecc->u.pm.point_1.x_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0, + ecc->u.pm.point_1.y_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + + /* Set the first point Z coordinate to 1 */ + *src.address = 0x01; + src.address += CCP_ECC_OPERAND_SIZE; + + if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) { + /* Copy the second point X and Y coordinate */ + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0, + ecc->u.pm.point_2.x_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0, + ecc->u.pm.point_2.y_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + + /* Set the second point Z coordinate to 1 */ + *src.address = 0x01; + src.address += CCP_ECC_OPERAND_SIZE; + } else { + /* Copy the Domain "a" parameter */ + ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0, + ecc->u.pm.domain_a_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + + if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) { + /* Copy the scalar value */ + ret = ccp_reverse_set_dm_area(&src, 0, + ecc->u.pm.scalar, 0, + ecc->u.pm.scalar_len); + if (ret) + goto e_src; + src.address += CCP_ECC_OPERAND_SIZE; + } + } + + /* Restore the workarea address */ + src.address = save; + + /* Prepare the output area for the operation */ + ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE, + DMA_FROM_DEVICE); + if (ret) + goto e_src; + + op.soc = 1; + op.src.u.dma.address = src.dma.address; + op.src.u.dma.offset = 0; + op.src.u.dma.length = src.length; + op.dst.u.dma.address = dst.dma.address; + op.dst.u.dma.offset = 0; + op.dst.u.dma.length = dst.length; + + op.u.ecc.function = cmd->u.ecc.function; + + ret = cmd_q->ccp->vdata->perform->ecc(&op); + if (ret) { + cmd->engine_error = cmd_q->cmd_error; + goto e_dst; + } + + ecc->ecc_result = le16_to_cpup( + (const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET)); + if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) { + ret = -EIO; + goto e_dst; + } + + /* Save the workarea address since it is updated as we walk through + * to copy the point math result + */ + save = dst.address; + + /* Save the ECC result X and Y coordinates */ + ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0, + CCP_ECC_MODULUS_BYTES); + dst.address += CCP_ECC_OUTPUT_SIZE; + ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0, + CCP_ECC_MODULUS_BYTES); + dst.address += CCP_ECC_OUTPUT_SIZE; + + /* Restore the workarea address */ + dst.address = save; + +e_dst: + ccp_dm_free(&dst); + +e_src: + ccp_dm_free(&src); + + return ret; +} + +static noinline_for_stack int +ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + struct ccp_ecc_engine *ecc = &cmd->u.ecc; + + ecc->ecc_result = 0; + + if (!ecc->mod || + (ecc->mod_len > CCP_ECC_MODULUS_BYTES)) + return -EINVAL; + + switch (ecc->function) { + case CCP_ECC_FUNCTION_MMUL_384BIT: + case CCP_ECC_FUNCTION_MADD_384BIT: + case CCP_ECC_FUNCTION_MINV_384BIT: + return ccp_run_ecc_mm_cmd(cmd_q, cmd); + + case CCP_ECC_FUNCTION_PADD_384BIT: + case CCP_ECC_FUNCTION_PMUL_384BIT: + case CCP_ECC_FUNCTION_PDBL_384BIT: + return ccp_run_ecc_pm_cmd(cmd_q, cmd); + + default: + return -EINVAL; + } +} + +int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) +{ + int ret; + + cmd->engine_error = 0; + cmd_q->cmd_error = 0; + cmd_q->int_rcvd = 0; + cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q); + + switch (cmd->engine) { + case CCP_ENGINE_AES: + switch (cmd->u.aes.mode) { + case CCP_AES_MODE_CMAC: + ret = ccp_run_aes_cmac_cmd(cmd_q, cmd); + break; + case CCP_AES_MODE_GCM: + ret = ccp_run_aes_gcm_cmd(cmd_q, cmd); + break; + default: + ret = ccp_run_aes_cmd(cmd_q, cmd); + break; + } + break; + case CCP_ENGINE_XTS_AES_128: + ret = ccp_run_xts_aes_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_DES3: + ret = ccp_run_des3_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_SHA: + ret = ccp_run_sha_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_RSA: + ret = ccp_run_rsa_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_PASSTHRU: + if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP) + ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd); + else + ret = ccp_run_passthru_cmd(cmd_q, cmd); + break; + case CCP_ENGINE_ECC: + ret = ccp_run_ecc_cmd(cmd_q, cmd); + break; + default: + ret = -EINVAL; + } + + return ret; +} diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c new file mode 100644 index 000000000..4bf9eaab4 --- /dev/null +++ b/drivers/crypto/ccp/psp-dev.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Platform Security Processor (PSP) interface + * + * Copyright (C) 2016,2019 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#include <linux/kernel.h> +#include <linux/irqreturn.h> + +#include "sp-dev.h" +#include "psp-dev.h" +#include "sev-dev.h" +#include "tee-dev.h" + +struct psp_device *psp_master; + +static struct psp_device *psp_alloc_struct(struct sp_device *sp) +{ + struct device *dev = sp->dev; + struct psp_device *psp; + + psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL); + if (!psp) + return NULL; + + psp->dev = dev; + psp->sp = sp; + + snprintf(psp->name, sizeof(psp->name), "psp-%u", sp->ord); + + return psp; +} + +static irqreturn_t psp_irq_handler(int irq, void *data) +{ + struct psp_device *psp = data; + unsigned int status; + + /* Read the interrupt status: */ + status = ioread32(psp->io_regs + psp->vdata->intsts_reg); + + /* Clear the interrupt status by writing the same value we read. */ + iowrite32(status, psp->io_regs + psp->vdata->intsts_reg); + + /* invoke subdevice interrupt handlers */ + if (status) { + if (psp->sev_irq_handler) + psp->sev_irq_handler(irq, psp->sev_irq_data, status); + + if (psp->tee_irq_handler) + psp->tee_irq_handler(irq, psp->tee_irq_data, status); + } + + return IRQ_HANDLED; +} + +static unsigned int psp_get_capability(struct psp_device *psp) +{ + unsigned int val = ioread32(psp->io_regs + psp->vdata->feature_reg); + + /* + * Check for a access to the registers. If this read returns + * 0xffffffff, it's likely that the system is running a broken + * BIOS which disallows access to the device. Stop here and + * fail the PSP initialization (but not the load, as the CCP + * could get properly initialized). + */ + if (val == 0xffffffff) { + dev_notice(psp->dev, "psp: unable to access the device: you might be running a broken BIOS.\n"); + return 0; + } + + return val; +} + +static int psp_check_sev_support(struct psp_device *psp, + unsigned int capability) +{ + /* Check if device supports SEV feature */ + if (!(capability & 1)) { + dev_dbg(psp->dev, "psp does not support SEV\n"); + return -ENODEV; + } + + return 0; +} + +static int psp_check_tee_support(struct psp_device *psp, + unsigned int capability) +{ + /* Check if device supports TEE feature */ + if (!(capability & 2)) { + dev_dbg(psp->dev, "psp does not support TEE\n"); + return -ENODEV; + } + + return 0; +} + +static int psp_check_support(struct psp_device *psp, + unsigned int capability) +{ + int sev_support = psp_check_sev_support(psp, capability); + int tee_support = psp_check_tee_support(psp, capability); + + /* Return error if device neither supports SEV nor TEE */ + if (sev_support && tee_support) + return -ENODEV; + + return 0; +} + +static int psp_init(struct psp_device *psp, unsigned int capability) +{ + int ret; + + if (!psp_check_sev_support(psp, capability)) { + ret = sev_dev_init(psp); + if (ret) + return ret; + } + + if (!psp_check_tee_support(psp, capability)) { + ret = tee_dev_init(psp); + if (ret) + return ret; + } + + return 0; +} + +int psp_dev_init(struct sp_device *sp) +{ + struct device *dev = sp->dev; + struct psp_device *psp; + unsigned int capability; + int ret; + + ret = -ENOMEM; + psp = psp_alloc_struct(sp); + if (!psp) + goto e_err; + + sp->psp_data = psp; + + psp->vdata = (struct psp_vdata *)sp->dev_vdata->psp_vdata; + if (!psp->vdata) { + ret = -ENODEV; + dev_err(dev, "missing driver data\n"); + goto e_err; + } + + psp->io_regs = sp->io_map; + + capability = psp_get_capability(psp); + if (!capability) + goto e_disable; + + ret = psp_check_support(psp, capability); + if (ret) + goto e_disable; + + /* Disable and clear interrupts until ready */ + iowrite32(0, psp->io_regs + psp->vdata->inten_reg); + iowrite32(-1, psp->io_regs + psp->vdata->intsts_reg); + + /* Request an irq */ + ret = sp_request_psp_irq(psp->sp, psp_irq_handler, psp->name, psp); + if (ret) { + dev_err(dev, "psp: unable to allocate an IRQ\n"); + goto e_err; + } + + ret = psp_init(psp, capability); + if (ret) + goto e_irq; + + if (sp->set_psp_master_device) + sp->set_psp_master_device(sp); + + /* Enable interrupt */ + iowrite32(-1, psp->io_regs + psp->vdata->inten_reg); + + dev_notice(dev, "psp enabled\n"); + + return 0; + +e_irq: + sp_free_psp_irq(psp->sp, psp); +e_err: + sp->psp_data = NULL; + + dev_notice(dev, "psp initialization failed\n"); + + return ret; + +e_disable: + sp->psp_data = NULL; + + return ret; +} + +void psp_dev_destroy(struct sp_device *sp) +{ + struct psp_device *psp = sp->psp_data; + + if (!psp) + return; + + sev_dev_destroy(psp); + + tee_dev_destroy(psp); + + sp_free_psp_irq(sp, psp); + + if (sp->clear_psp_master_device) + sp->clear_psp_master_device(sp); +} + +void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler, + void *data) +{ + psp->sev_irq_data = data; + psp->sev_irq_handler = handler; +} + +void psp_clear_sev_irq_handler(struct psp_device *psp) +{ + psp_set_sev_irq_handler(psp, NULL, NULL); +} + +void psp_set_tee_irq_handler(struct psp_device *psp, psp_irq_handler_t handler, + void *data) +{ + psp->tee_irq_data = data; + psp->tee_irq_handler = handler; +} + +void psp_clear_tee_irq_handler(struct psp_device *psp) +{ + psp_set_tee_irq_handler(psp, NULL, NULL); +} + +struct psp_device *psp_get_master_device(void) +{ + struct sp_device *sp = sp_get_psp_master_device(); + + return sp ? sp->psp_data : NULL; +} + +void psp_pci_init(void) +{ + psp_master = psp_get_master_device(); + + if (!psp_master) + return; + + sev_pci_init(); +} + +void psp_pci_exit(void) +{ + if (!psp_master) + return; + + sev_pci_exit(); +} diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h new file mode 100644 index 000000000..ef38e4135 --- /dev/null +++ b/drivers/crypto/ccp/psp-dev.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Platform Security Processor (PSP) interface driver + * + * Copyright (C) 2017-2019 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#ifndef __PSP_DEV_H__ +#define __PSP_DEV_H__ + +#include <linux/device.h> +#include <linux/list.h> +#include <linux/bits.h> +#include <linux/interrupt.h> + +#include "sp-dev.h" + +#define PSP_CMDRESP_RESP BIT(31) +#define PSP_CMDRESP_ERR_MASK 0xffff + +#define MAX_PSP_NAME_LEN 16 + +extern struct psp_device *psp_master; + +typedef void (*psp_irq_handler_t)(int, void *, unsigned int); + +struct psp_device { + struct list_head entry; + + struct psp_vdata *vdata; + char name[MAX_PSP_NAME_LEN]; + + struct device *dev; + struct sp_device *sp; + + void __iomem *io_regs; + + psp_irq_handler_t sev_irq_handler; + void *sev_irq_data; + + psp_irq_handler_t tee_irq_handler; + void *tee_irq_data; + + void *sev_data; + void *tee_data; +}; + +void psp_set_sev_irq_handler(struct psp_device *psp, psp_irq_handler_t handler, + void *data); +void psp_clear_sev_irq_handler(struct psp_device *psp); + +void psp_set_tee_irq_handler(struct psp_device *psp, psp_irq_handler_t handler, + void *data); +void psp_clear_tee_irq_handler(struct psp_device *psp); + +struct psp_device *psp_get_master_device(void); + +#endif /* __PSP_DEV_H */ diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c new file mode 100644 index 000000000..8e2672ec6 --- /dev/null +++ b/drivers/crypto/ccp/sev-dev.c @@ -0,0 +1,1137 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure Encrypted Virtualization (SEV) interface + * + * Copyright (C) 2016,2019 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/hw_random.h> +#include <linux/ccp.h> +#include <linux/firmware.h> +#include <linux/gfp.h> + +#include <asm/smp.h> +#include <asm/cacheflush.h> + +#include "psp-dev.h" +#include "sev-dev.h" + +#define DEVICE_NAME "sev" +#define SEV_FW_FILE "amd/sev.fw" +#define SEV_FW_NAME_SIZE 64 + +static DEFINE_MUTEX(sev_cmd_mutex); +static struct sev_misc_dev *misc_dev; + +static int psp_cmd_timeout = 100; +module_param(psp_cmd_timeout, int, 0644); +MODULE_PARM_DESC(psp_cmd_timeout, " default timeout value, in seconds, for PSP commands"); + +static int psp_probe_timeout = 5; +module_param(psp_probe_timeout, int, 0644); +MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe"); + +MODULE_FIRMWARE("amd/amd_sev_fam17h_model0xh.sbin"); /* 1st gen EPYC */ +MODULE_FIRMWARE("amd/amd_sev_fam17h_model3xh.sbin"); /* 2nd gen EPYC */ +MODULE_FIRMWARE("amd/amd_sev_fam19h_model0xh.sbin"); /* 3rd gen EPYC */ + +static bool psp_dead; +static int psp_timeout; + +/* Trusted Memory Region (TMR): + * The TMR is a 1MB area that must be 1MB aligned. Use the page allocator + * to allocate the memory, which will return aligned memory for the specified + * allocation order. + */ +#define SEV_ES_TMR_SIZE (1024 * 1024) +static void *sev_es_tmr; + +static inline bool sev_version_greater_or_equal(u8 maj, u8 min) +{ + struct sev_device *sev = psp_master->sev_data; + + if (sev->api_major > maj) + return true; + + if (sev->api_major == maj && sev->api_minor >= min) + return true; + + return false; +} + +static void sev_irq_handler(int irq, void *data, unsigned int status) +{ + struct sev_device *sev = data; + int reg; + + /* Check if it is command completion: */ + if (!(status & SEV_CMD_COMPLETE)) + return; + + /* Check if it is SEV command completion: */ + reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg); + if (reg & PSP_CMDRESP_RESP) { + sev->int_rcvd = 1; + wake_up(&sev->int_queue); + } +} + +static int sev_wait_cmd_ioc(struct sev_device *sev, + unsigned int *reg, unsigned int timeout) +{ + int ret; + + ret = wait_event_timeout(sev->int_queue, + sev->int_rcvd, timeout * HZ); + if (!ret) + return -ETIMEDOUT; + + *reg = ioread32(sev->io_regs + sev->vdata->cmdresp_reg); + + return 0; +} + +static int sev_cmd_buffer_len(int cmd) +{ + switch (cmd) { + case SEV_CMD_INIT: return sizeof(struct sev_data_init); + case SEV_CMD_PLATFORM_STATUS: return sizeof(struct sev_user_data_status); + case SEV_CMD_PEK_CSR: return sizeof(struct sev_data_pek_csr); + case SEV_CMD_PEK_CERT_IMPORT: return sizeof(struct sev_data_pek_cert_import); + case SEV_CMD_PDH_CERT_EXPORT: return sizeof(struct sev_data_pdh_cert_export); + case SEV_CMD_LAUNCH_START: return sizeof(struct sev_data_launch_start); + case SEV_CMD_LAUNCH_UPDATE_DATA: return sizeof(struct sev_data_launch_update_data); + case SEV_CMD_LAUNCH_UPDATE_VMSA: return sizeof(struct sev_data_launch_update_vmsa); + case SEV_CMD_LAUNCH_FINISH: return sizeof(struct sev_data_launch_finish); + case SEV_CMD_LAUNCH_MEASURE: return sizeof(struct sev_data_launch_measure); + case SEV_CMD_ACTIVATE: return sizeof(struct sev_data_activate); + case SEV_CMD_DEACTIVATE: return sizeof(struct sev_data_deactivate); + case SEV_CMD_DECOMMISSION: return sizeof(struct sev_data_decommission); + case SEV_CMD_GUEST_STATUS: return sizeof(struct sev_data_guest_status); + case SEV_CMD_DBG_DECRYPT: return sizeof(struct sev_data_dbg); + case SEV_CMD_DBG_ENCRYPT: return sizeof(struct sev_data_dbg); + case SEV_CMD_SEND_START: return sizeof(struct sev_data_send_start); + case SEV_CMD_SEND_UPDATE_DATA: return sizeof(struct sev_data_send_update_data); + case SEV_CMD_SEND_UPDATE_VMSA: return sizeof(struct sev_data_send_update_vmsa); + case SEV_CMD_SEND_FINISH: return sizeof(struct sev_data_send_finish); + case SEV_CMD_RECEIVE_START: return sizeof(struct sev_data_receive_start); + case SEV_CMD_RECEIVE_FINISH: return sizeof(struct sev_data_receive_finish); + case SEV_CMD_RECEIVE_UPDATE_DATA: return sizeof(struct sev_data_receive_update_data); + case SEV_CMD_RECEIVE_UPDATE_VMSA: return sizeof(struct sev_data_receive_update_vmsa); + case SEV_CMD_LAUNCH_UPDATE_SECRET: return sizeof(struct sev_data_launch_secret); + case SEV_CMD_DOWNLOAD_FIRMWARE: return sizeof(struct sev_data_download_firmware); + case SEV_CMD_GET_ID: return sizeof(struct sev_data_get_id); + default: return 0; + } + + return 0; +} + +static void *sev_fw_alloc(unsigned long len) +{ + struct page *page; + + page = alloc_pages(GFP_KERNEL, get_order(len)); + if (!page) + return NULL; + + return page_address(page); +} + +static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret) +{ + struct psp_device *psp = psp_master; + struct sev_device *sev; + unsigned int phys_lsb, phys_msb; + unsigned int reg, ret = 0; + int buf_len; + + if (!psp || !psp->sev_data) + return -ENODEV; + + if (psp_dead) + return -EBUSY; + + sev = psp->sev_data; + + buf_len = sev_cmd_buffer_len(cmd); + if (WARN_ON_ONCE(!data != !buf_len)) + return -EINVAL; + + /* + * Copy the incoming data to driver's scratch buffer as __pa() will not + * work for some memory, e.g. vmalloc'd addresses, and @data may not be + * physically contiguous. + */ + if (data) + memcpy(sev->cmd_buf, data, buf_len); + + /* Get the physical address of the command buffer */ + phys_lsb = data ? lower_32_bits(__psp_pa(sev->cmd_buf)) : 0; + phys_msb = data ? upper_32_bits(__psp_pa(sev->cmd_buf)) : 0; + + dev_dbg(sev->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n", + cmd, phys_msb, phys_lsb, psp_timeout); + + print_hex_dump_debug("(in): ", DUMP_PREFIX_OFFSET, 16, 2, data, + buf_len, false); + + iowrite32(phys_lsb, sev->io_regs + sev->vdata->cmdbuff_addr_lo_reg); + iowrite32(phys_msb, sev->io_regs + sev->vdata->cmdbuff_addr_hi_reg); + + sev->int_rcvd = 0; + + reg = cmd; + reg <<= SEV_CMDRESP_CMD_SHIFT; + reg |= SEV_CMDRESP_IOC; + iowrite32(reg, sev->io_regs + sev->vdata->cmdresp_reg); + + /* wait for command completion */ + ret = sev_wait_cmd_ioc(sev, ®, psp_timeout); + if (ret) { + if (psp_ret) + *psp_ret = 0; + + dev_err(sev->dev, "sev command %#x timed out, disabling PSP\n", cmd); + psp_dead = true; + + return ret; + } + + psp_timeout = psp_cmd_timeout; + + if (psp_ret) + *psp_ret = reg & PSP_CMDRESP_ERR_MASK; + + if (reg & PSP_CMDRESP_ERR_MASK) { + dev_dbg(sev->dev, "sev command %#x failed (%#010x)\n", + cmd, reg & PSP_CMDRESP_ERR_MASK); + ret = -EIO; + } + + print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data, + buf_len, false); + + /* + * Copy potential output from the PSP back to data. Do this even on + * failure in case the caller wants to glean something from the error. + */ + if (data) + memcpy(data, sev->cmd_buf, buf_len); + + return ret; +} + +static int sev_do_cmd(int cmd, void *data, int *psp_ret) +{ + int rc; + + mutex_lock(&sev_cmd_mutex); + rc = __sev_do_cmd_locked(cmd, data, psp_ret); + mutex_unlock(&sev_cmd_mutex); + + return rc; +} + +static int __sev_platform_init_locked(int *error) +{ + struct psp_device *psp = psp_master; + struct sev_device *sev; + int rc = 0; + + if (!psp || !psp->sev_data) + return -ENODEV; + + sev = psp->sev_data; + + if (sev->state == SEV_STATE_INIT) + return 0; + + if (sev_es_tmr) { + u64 tmr_pa; + + /* + * Do not include the encryption mask on the physical + * address of the TMR (firmware should clear it anyway). + */ + tmr_pa = __pa(sev_es_tmr); + + sev->init_cmd_buf.flags |= SEV_INIT_FLAGS_SEV_ES; + sev->init_cmd_buf.tmr_address = tmr_pa; + sev->init_cmd_buf.tmr_len = SEV_ES_TMR_SIZE; + } + + rc = __sev_do_cmd_locked(SEV_CMD_INIT, &sev->init_cmd_buf, error); + if (rc) + return rc; + + sev->state = SEV_STATE_INIT; + + /* Prepare for first SEV guest launch after INIT */ + wbinvd_on_all_cpus(); + rc = __sev_do_cmd_locked(SEV_CMD_DF_FLUSH, NULL, error); + if (rc) + return rc; + + dev_dbg(sev->dev, "SEV firmware initialized\n"); + + return rc; +} + +int sev_platform_init(int *error) +{ + int rc; + + mutex_lock(&sev_cmd_mutex); + rc = __sev_platform_init_locked(error); + mutex_unlock(&sev_cmd_mutex); + + return rc; +} +EXPORT_SYMBOL_GPL(sev_platform_init); + +static int __sev_platform_shutdown_locked(int *error) +{ + struct sev_device *sev = psp_master->sev_data; + int ret; + + if (!sev || sev->state == SEV_STATE_UNINIT) + return 0; + + ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); + if (ret) + return ret; + + sev->state = SEV_STATE_UNINIT; + dev_dbg(sev->dev, "SEV firmware shutdown\n"); + + return ret; +} + +static int sev_platform_shutdown(int *error) +{ + int rc; + + mutex_lock(&sev_cmd_mutex); + rc = __sev_platform_shutdown_locked(NULL); + mutex_unlock(&sev_cmd_mutex); + + return rc; +} + +static int sev_get_platform_state(int *state, int *error) +{ + struct sev_user_data_status data; + int rc; + + rc = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, error); + if (rc) + return rc; + + *state = data.state; + return rc; +} + +static int sev_ioctl_do_reset(struct sev_issue_cmd *argp, bool writable) +{ + int state, rc; + + if (!writable) + return -EPERM; + + /* + * The SEV spec requires that FACTORY_RESET must be issued in + * UNINIT state. Before we go further lets check if any guest is + * active. + * + * If FW is in WORKING state then deny the request otherwise issue + * SHUTDOWN command do INIT -> UNINIT before issuing the FACTORY_RESET. + * + */ + rc = sev_get_platform_state(&state, &argp->error); + if (rc) + return rc; + + if (state == SEV_STATE_WORKING) + return -EBUSY; + + if (state == SEV_STATE_INIT) { + rc = __sev_platform_shutdown_locked(&argp->error); + if (rc) + return rc; + } + + return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error); +} + +static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) +{ + struct sev_user_data_status data; + int ret; + + memset(&data, 0, sizeof(data)); + + ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, &argp->error); + if (ret) + return ret; + + if (copy_to_user((void __user *)argp->data, &data, sizeof(data))) + ret = -EFAULT; + + return ret; +} + +static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp, bool writable) +{ + struct sev_device *sev = psp_master->sev_data; + int rc; + + if (!writable) + return -EPERM; + + if (sev->state == SEV_STATE_UNINIT) { + rc = __sev_platform_init_locked(&argp->error); + if (rc) + return rc; + } + + return __sev_do_cmd_locked(cmd, NULL, &argp->error); +} + +static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) +{ + struct sev_device *sev = psp_master->sev_data; + struct sev_user_data_pek_csr input; + struct sev_data_pek_csr data; + void __user *input_address; + void *blob = NULL; + int ret; + + if (!writable) + return -EPERM; + + if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) + return -EFAULT; + + memset(&data, 0, sizeof(data)); + + /* userspace wants to query CSR length */ + if (!input.address || !input.length) + goto cmd; + + /* allocate a physically contiguous buffer to store the CSR blob */ + input_address = (void __user *)input.address; + if (input.length > SEV_FW_BLOB_MAX_SIZE) + return -EFAULT; + + blob = kzalloc(input.length, GFP_KERNEL); + if (!blob) + return -ENOMEM; + + data.address = __psp_pa(blob); + data.len = input.length; + +cmd: + if (sev->state == SEV_STATE_UNINIT) { + ret = __sev_platform_init_locked(&argp->error); + if (ret) + goto e_free_blob; + } + + ret = __sev_do_cmd_locked(SEV_CMD_PEK_CSR, &data, &argp->error); + + /* If we query the CSR length, FW responded with expected data. */ + input.length = data.len; + + if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) { + ret = -EFAULT; + goto e_free_blob; + } + + if (blob) { + if (copy_to_user(input_address, blob, input.length)) + ret = -EFAULT; + } + +e_free_blob: + kfree(blob); + return ret; +} + +void *psp_copy_user_blob(u64 uaddr, u32 len) +{ + if (!uaddr || !len) + return ERR_PTR(-EINVAL); + + /* verify that blob length does not exceed our limit */ + if (len > SEV_FW_BLOB_MAX_SIZE) + return ERR_PTR(-EINVAL); + + return memdup_user((void __user *)uaddr, len); +} +EXPORT_SYMBOL_GPL(psp_copy_user_blob); + +static int sev_get_api_version(void) +{ + struct sev_device *sev = psp_master->sev_data; + struct sev_user_data_status status; + int error = 0, ret; + + ret = sev_platform_status(&status, &error); + if (ret) { + dev_err(sev->dev, + "SEV: failed to get status. Error: %#x\n", error); + return 1; + } + + sev->api_major = status.api_major; + sev->api_minor = status.api_minor; + sev->build = status.build; + sev->state = status.state; + + return 0; +} + +static int sev_get_firmware(struct device *dev, + const struct firmware **firmware) +{ + char fw_name_specific[SEV_FW_NAME_SIZE]; + char fw_name_subset[SEV_FW_NAME_SIZE]; + + snprintf(fw_name_specific, sizeof(fw_name_specific), + "amd/amd_sev_fam%.2xh_model%.2xh.sbin", + boot_cpu_data.x86, boot_cpu_data.x86_model); + + snprintf(fw_name_subset, sizeof(fw_name_subset), + "amd/amd_sev_fam%.2xh_model%.1xxh.sbin", + boot_cpu_data.x86, (boot_cpu_data.x86_model & 0xf0) >> 4); + + /* Check for SEV FW for a particular model. + * Ex. amd_sev_fam17h_model00h.sbin for Family 17h Model 00h + * + * or + * + * Check for SEV FW common to a subset of models. + * Ex. amd_sev_fam17h_model0xh.sbin for + * Family 17h Model 00h -- Family 17h Model 0Fh + * + * or + * + * Fall-back to using generic name: sev.fw + */ + if ((firmware_request_nowarn(firmware, fw_name_specific, dev) >= 0) || + (firmware_request_nowarn(firmware, fw_name_subset, dev) >= 0) || + (firmware_request_nowarn(firmware, SEV_FW_FILE, dev) >= 0)) + return 0; + + return -ENOENT; +} + +/* Don't fail if SEV FW couldn't be updated. Continue with existing SEV FW */ +static int sev_update_firmware(struct device *dev) +{ + struct sev_data_download_firmware *data; + const struct firmware *firmware; + int ret, error, order; + struct page *p; + u64 data_size; + + if (sev_get_firmware(dev, &firmware) == -ENOENT) { + dev_dbg(dev, "No SEV firmware file present\n"); + return -1; + } + + /* + * SEV FW expects the physical address given to it to be 32 + * byte aligned. Memory allocated has structure placed at the + * beginning followed by the firmware being passed to the SEV + * FW. Allocate enough memory for data structure + alignment + * padding + SEV FW. + */ + data_size = ALIGN(sizeof(struct sev_data_download_firmware), 32); + + order = get_order(firmware->size + data_size); + p = alloc_pages(GFP_KERNEL, order); + if (!p) { + ret = -1; + goto fw_err; + } + + /* + * Copy firmware data to a kernel allocated contiguous + * memory region. + */ + data = page_address(p); + memcpy(page_address(p) + data_size, firmware->data, firmware->size); + + data->address = __psp_pa(page_address(p) + data_size); + data->len = firmware->size; + + ret = sev_do_cmd(SEV_CMD_DOWNLOAD_FIRMWARE, data, &error); + if (ret) + dev_dbg(dev, "Failed to update SEV firmware: %#x\n", error); + else + dev_info(dev, "SEV firmware update successful\n"); + + __free_pages(p, order); + +fw_err: + release_firmware(firmware); + + return ret; +} + +static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp, bool writable) +{ + struct sev_device *sev = psp_master->sev_data; + struct sev_user_data_pek_cert_import input; + struct sev_data_pek_cert_import data; + void *pek_blob, *oca_blob; + int ret; + + if (!writable) + return -EPERM; + + if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) + return -EFAULT; + + /* copy PEK certificate blobs from userspace */ + pek_blob = psp_copy_user_blob(input.pek_cert_address, input.pek_cert_len); + if (IS_ERR(pek_blob)) + return PTR_ERR(pek_blob); + + data.reserved = 0; + data.pek_cert_address = __psp_pa(pek_blob); + data.pek_cert_len = input.pek_cert_len; + + /* copy PEK certificate blobs from userspace */ + oca_blob = psp_copy_user_blob(input.oca_cert_address, input.oca_cert_len); + if (IS_ERR(oca_blob)) { + ret = PTR_ERR(oca_blob); + goto e_free_pek; + } + + data.oca_cert_address = __psp_pa(oca_blob); + data.oca_cert_len = input.oca_cert_len; + + /* If platform is not in INIT state then transition it to INIT */ + if (sev->state != SEV_STATE_INIT) { + ret = __sev_platform_init_locked(&argp->error); + if (ret) + goto e_free_oca; + } + + ret = __sev_do_cmd_locked(SEV_CMD_PEK_CERT_IMPORT, &data, &argp->error); + +e_free_oca: + kfree(oca_blob); +e_free_pek: + kfree(pek_blob); + return ret; +} + +static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) +{ + struct sev_user_data_get_id2 input; + struct sev_data_get_id data; + void __user *input_address; + void *id_blob = NULL; + int ret; + + /* SEV GET_ID is available from SEV API v0.16 and up */ + if (!sev_version_greater_or_equal(0, 16)) + return -ENOTSUPP; + + if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) + return -EFAULT; + + input_address = (void __user *)input.address; + + if (input.address && input.length) { + /* + * The length of the ID shouldn't be assumed by software since + * it may change in the future. The allocation size is limited + * to 1 << (PAGE_SHIFT + MAX_ORDER - 1) by the page allocator. + * If the allocation fails, simply return ENOMEM rather than + * warning in the kernel log. + */ + id_blob = kzalloc(input.length, GFP_KERNEL | __GFP_NOWARN); + if (!id_blob) + return -ENOMEM; + + data.address = __psp_pa(id_blob); + data.len = input.length; + } else { + data.address = 0; + data.len = 0; + } + + ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, &data, &argp->error); + + /* + * Firmware will return the length of the ID value (either the minimum + * required length or the actual length written), return it to the user. + */ + input.length = data.len; + + if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) { + ret = -EFAULT; + goto e_free; + } + + if (id_blob) { + if (copy_to_user(input_address, id_blob, data.len)) { + ret = -EFAULT; + goto e_free; + } + } + +e_free: + kfree(id_blob); + + return ret; +} + +static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp) +{ + struct sev_data_get_id *data; + u64 data_size, user_size; + void *id_blob, *mem; + int ret; + + /* SEV GET_ID available from SEV API v0.16 and up */ + if (!sev_version_greater_or_equal(0, 16)) + return -ENOTSUPP; + + /* SEV FW expects the buffer it fills with the ID to be + * 8-byte aligned. Memory allocated should be enough to + * hold data structure + alignment padding + memory + * where SEV FW writes the ID. + */ + data_size = ALIGN(sizeof(struct sev_data_get_id), 8); + user_size = sizeof(struct sev_user_data_get_id); + + mem = kzalloc(data_size + user_size, GFP_KERNEL); + if (!mem) + return -ENOMEM; + + data = mem; + id_blob = mem + data_size; + + data->address = __psp_pa(id_blob); + data->len = user_size; + + ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error); + if (!ret) { + if (copy_to_user((void __user *)argp->data, id_blob, data->len)) + ret = -EFAULT; + } + + kfree(mem); + + return ret; +} + +static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) +{ + struct sev_device *sev = psp_master->sev_data; + struct sev_user_data_pdh_cert_export input; + void *pdh_blob = NULL, *cert_blob = NULL; + struct sev_data_pdh_cert_export data; + void __user *input_cert_chain_address; + void __user *input_pdh_cert_address; + int ret; + + /* If platform is not in INIT state then transition it to INIT. */ + if (sev->state != SEV_STATE_INIT) { + if (!writable) + return -EPERM; + + ret = __sev_platform_init_locked(&argp->error); + if (ret) + return ret; + } + + if (copy_from_user(&input, (void __user *)argp->data, sizeof(input))) + return -EFAULT; + + memset(&data, 0, sizeof(data)); + + /* Userspace wants to query the certificate length. */ + if (!input.pdh_cert_address || + !input.pdh_cert_len || + !input.cert_chain_address) + goto cmd; + + input_pdh_cert_address = (void __user *)input.pdh_cert_address; + input_cert_chain_address = (void __user *)input.cert_chain_address; + + /* Allocate a physically contiguous buffer to store the PDH blob. */ + if (input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) + return -EFAULT; + + /* Allocate a physically contiguous buffer to store the cert chain blob. */ + if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) + return -EFAULT; + + pdh_blob = kzalloc(input.pdh_cert_len, GFP_KERNEL); + if (!pdh_blob) + return -ENOMEM; + + data.pdh_cert_address = __psp_pa(pdh_blob); + data.pdh_cert_len = input.pdh_cert_len; + + cert_blob = kzalloc(input.cert_chain_len, GFP_KERNEL); + if (!cert_blob) { + ret = -ENOMEM; + goto e_free_pdh; + } + + data.cert_chain_address = __psp_pa(cert_blob); + data.cert_chain_len = input.cert_chain_len; + +cmd: + ret = __sev_do_cmd_locked(SEV_CMD_PDH_CERT_EXPORT, &data, &argp->error); + + /* If we query the length, FW responded with expected data. */ + input.cert_chain_len = data.cert_chain_len; + input.pdh_cert_len = data.pdh_cert_len; + + if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) { + ret = -EFAULT; + goto e_free_cert; + } + + if (pdh_blob) { + if (copy_to_user(input_pdh_cert_address, + pdh_blob, input.pdh_cert_len)) { + ret = -EFAULT; + goto e_free_cert; + } + } + + if (cert_blob) { + if (copy_to_user(input_cert_chain_address, + cert_blob, input.cert_chain_len)) + ret = -EFAULT; + } + +e_free_cert: + kfree(cert_blob); +e_free_pdh: + kfree(pdh_blob); + return ret; +} + +static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct sev_issue_cmd input; + int ret = -EFAULT; + bool writable = file->f_mode & FMODE_WRITE; + + if (!psp_master || !psp_master->sev_data) + return -ENODEV; + + if (ioctl != SEV_ISSUE_CMD) + return -EINVAL; + + if (copy_from_user(&input, argp, sizeof(struct sev_issue_cmd))) + return -EFAULT; + + if (input.cmd > SEV_MAX) + return -EINVAL; + + mutex_lock(&sev_cmd_mutex); + + switch (input.cmd) { + + case SEV_FACTORY_RESET: + ret = sev_ioctl_do_reset(&input, writable); + break; + case SEV_PLATFORM_STATUS: + ret = sev_ioctl_do_platform_status(&input); + break; + case SEV_PEK_GEN: + ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input, writable); + break; + case SEV_PDH_GEN: + ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input, writable); + break; + case SEV_PEK_CSR: + ret = sev_ioctl_do_pek_csr(&input, writable); + break; + case SEV_PEK_CERT_IMPORT: + ret = sev_ioctl_do_pek_import(&input, writable); + break; + case SEV_PDH_CERT_EXPORT: + ret = sev_ioctl_do_pdh_export(&input, writable); + break; + case SEV_GET_ID: + pr_warn_once("SEV_GET_ID command is deprecated, use SEV_GET_ID2\n"); + ret = sev_ioctl_do_get_id(&input); + break; + case SEV_GET_ID2: + ret = sev_ioctl_do_get_id2(&input); + break; + default: + ret = -EINVAL; + goto out; + } + + if (copy_to_user(argp, &input, sizeof(struct sev_issue_cmd))) + ret = -EFAULT; +out: + mutex_unlock(&sev_cmd_mutex); + + return ret; +} + +static const struct file_operations sev_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = sev_ioctl, +}; + +int sev_platform_status(struct sev_user_data_status *data, int *error) +{ + return sev_do_cmd(SEV_CMD_PLATFORM_STATUS, data, error); +} +EXPORT_SYMBOL_GPL(sev_platform_status); + +int sev_guest_deactivate(struct sev_data_deactivate *data, int *error) +{ + return sev_do_cmd(SEV_CMD_DEACTIVATE, data, error); +} +EXPORT_SYMBOL_GPL(sev_guest_deactivate); + +int sev_guest_activate(struct sev_data_activate *data, int *error) +{ + return sev_do_cmd(SEV_CMD_ACTIVATE, data, error); +} +EXPORT_SYMBOL_GPL(sev_guest_activate); + +int sev_guest_decommission(struct sev_data_decommission *data, int *error) +{ + return sev_do_cmd(SEV_CMD_DECOMMISSION, data, error); +} +EXPORT_SYMBOL_GPL(sev_guest_decommission); + +int sev_guest_df_flush(int *error) +{ + return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error); +} +EXPORT_SYMBOL_GPL(sev_guest_df_flush); + +static void sev_exit(struct kref *ref) +{ + misc_deregister(&misc_dev->misc); + kfree(misc_dev); + misc_dev = NULL; +} + +static int sev_misc_init(struct sev_device *sev) +{ + struct device *dev = sev->dev; + int ret; + + /* + * SEV feature support can be detected on multiple devices but the SEV + * FW commands must be issued on the master. During probe, we do not + * know the master hence we create /dev/sev on the first device probe. + * sev_do_cmd() finds the right master device to which to issue the + * command to the firmware. + */ + if (!misc_dev) { + struct miscdevice *misc; + + misc_dev = kzalloc(sizeof(*misc_dev), GFP_KERNEL); + if (!misc_dev) + return -ENOMEM; + + misc = &misc_dev->misc; + misc->minor = MISC_DYNAMIC_MINOR; + misc->name = DEVICE_NAME; + misc->fops = &sev_fops; + + ret = misc_register(misc); + if (ret) + return ret; + + kref_init(&misc_dev->refcount); + } else { + kref_get(&misc_dev->refcount); + } + + init_waitqueue_head(&sev->int_queue); + sev->misc = misc_dev; + dev_dbg(dev, "registered SEV device\n"); + + return 0; +} + +int sev_dev_init(struct psp_device *psp) +{ + struct device *dev = psp->dev; + struct sev_device *sev; + int ret = -ENOMEM; + + sev = devm_kzalloc(dev, sizeof(*sev), GFP_KERNEL); + if (!sev) + goto e_err; + + sev->cmd_buf = (void *)devm_get_free_pages(dev, GFP_KERNEL, 0); + if (!sev->cmd_buf) + goto e_sev; + + psp->sev_data = sev; + + sev->dev = dev; + sev->psp = psp; + + sev->io_regs = psp->io_regs; + + sev->vdata = (struct sev_vdata *)psp->vdata->sev; + if (!sev->vdata) { + ret = -ENODEV; + dev_err(dev, "sev: missing driver data\n"); + goto e_buf; + } + + psp_set_sev_irq_handler(psp, sev_irq_handler, sev); + + ret = sev_misc_init(sev); + if (ret) + goto e_irq; + + dev_notice(dev, "sev enabled\n"); + + return 0; + +e_irq: + psp_clear_sev_irq_handler(psp); +e_buf: + devm_free_pages(dev, (unsigned long)sev->cmd_buf); +e_sev: + devm_kfree(dev, sev); +e_err: + psp->sev_data = NULL; + + dev_notice(dev, "sev initialization failed\n"); + + return ret; +} + +static void sev_firmware_shutdown(struct sev_device *sev) +{ + sev_platform_shutdown(NULL); + + if (sev_es_tmr) { + /* The TMR area was encrypted, flush it from the cache */ + wbinvd_on_all_cpus(); + + free_pages((unsigned long)sev_es_tmr, + get_order(SEV_ES_TMR_SIZE)); + sev_es_tmr = NULL; + } +} + +void sev_dev_destroy(struct psp_device *psp) +{ + struct sev_device *sev = psp->sev_data; + + if (!sev) + return; + + sev_firmware_shutdown(sev); + + if (sev->misc) + kref_put(&misc_dev->refcount, sev_exit); + + psp_clear_sev_irq_handler(psp); +} + +int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd, + void *data, int *error) +{ + if (!filep || filep->f_op != &sev_fops) + return -EBADF; + + return sev_do_cmd(cmd, data, error); +} +EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user); + +void sev_pci_init(void) +{ + struct sev_device *sev = psp_master->sev_data; + int error, rc; + + if (!sev) + return; + + psp_timeout = psp_probe_timeout; + + if (sev_get_api_version()) + goto err; + + if (sev_version_greater_or_equal(0, 15) && + sev_update_firmware(sev->dev) == 0) + sev_get_api_version(); + + /* Obtain the TMR memory area for SEV-ES use */ + sev_es_tmr = sev_fw_alloc(SEV_ES_TMR_SIZE); + if (sev_es_tmr) + /* Must flush the cache before giving it to the firmware */ + clflush_cache_range(sev_es_tmr, SEV_ES_TMR_SIZE); + else + dev_warn(sev->dev, + "SEV: TMR allocation failed, SEV-ES support unavailable\n"); + + /* Initialize the platform */ + rc = sev_platform_init(&error); + if (rc && (error == SEV_RET_SECURE_DATA_INVALID)) { + /* + * INIT command returned an integrity check failure + * status code, meaning that firmware load and + * validation of SEV related persistent data has + * failed and persistent state has been erased. + * Retrying INIT command here should succeed. + */ + dev_dbg(sev->dev, "SEV: retrying INIT command"); + rc = sev_platform_init(&error); + } + + if (rc) { + dev_err(sev->dev, "SEV: failed to INIT error %#x\n", error); + return; + } + + dev_info(sev->dev, "SEV API:%d.%d build:%d\n", sev->api_major, + sev->api_minor, sev->build); + + return; + +err: + psp_master->sev_data = NULL; +} + +void sev_pci_exit(void) +{ + struct sev_device *sev = psp_master->sev_data; + + if (!sev) + return; + + sev_firmware_shutdown(sev); +} diff --git a/drivers/crypto/ccp/sev-dev.h b/drivers/crypto/ccp/sev-dev.h new file mode 100644 index 000000000..0fd21433f --- /dev/null +++ b/drivers/crypto/ccp/sev-dev.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Platform Security Processor (PSP) interface driver + * + * Copyright (C) 2017-2019 Advanced Micro Devices, Inc. + * + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#ifndef __SEV_DEV_H__ +#define __SEV_DEV_H__ + +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/dmapool.h> +#include <linux/hw_random.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/irqreturn.h> +#include <linux/dmaengine.h> +#include <linux/psp-sev.h> +#include <linux/miscdevice.h> +#include <linux/capability.h> + +#define SEV_CMD_COMPLETE BIT(1) +#define SEV_CMDRESP_CMD_SHIFT 16 +#define SEV_CMDRESP_IOC BIT(0) + +struct sev_misc_dev { + struct kref refcount; + struct miscdevice misc; +}; + +struct sev_device { + struct device *dev; + struct psp_device *psp; + + void __iomem *io_regs; + + struct sev_vdata *vdata; + + int state; + unsigned int int_rcvd; + wait_queue_head_t int_queue; + struct sev_misc_dev *misc; + struct sev_data_init init_cmd_buf; + + u8 api_major; + u8 api_minor; + u8 build; + + void *cmd_buf; +}; + +int sev_dev_init(struct psp_device *psp); +void sev_dev_destroy(struct psp_device *psp); + +void sev_pci_init(void); +void sev_pci_exit(void); + +#endif /* __SEV_DEV_H */ diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c new file mode 100644 index 000000000..6284a15e5 --- /dev/null +++ b/drivers/crypto/ccp/sp-dev.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure Processor driver + * + * Copyright (C) 2017-2018 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/spinlock_types.h> +#include <linux/types.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" +#include "sp-dev.h" + +MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>"); +MODULE_LICENSE("GPL"); +MODULE_VERSION("1.1.0"); +MODULE_DESCRIPTION("AMD Secure Processor driver"); + +/* List of SPs, SP count, read-write access lock, and access functions + * + * Lock structure: get sp_unit_lock for reading whenever we need to + * examine the SP list. + */ +static DEFINE_RWLOCK(sp_unit_lock); +static LIST_HEAD(sp_units); + +/* Ever-increasing value to produce unique unit numbers */ +static atomic_t sp_ordinal; + +static void sp_add_device(struct sp_device *sp) +{ + unsigned long flags; + + write_lock_irqsave(&sp_unit_lock, flags); + + list_add_tail(&sp->entry, &sp_units); + + write_unlock_irqrestore(&sp_unit_lock, flags); +} + +static void sp_del_device(struct sp_device *sp) +{ + unsigned long flags; + + write_lock_irqsave(&sp_unit_lock, flags); + + list_del(&sp->entry); + + write_unlock_irqrestore(&sp_unit_lock, flags); +} + +static irqreturn_t sp_irq_handler(int irq, void *data) +{ + struct sp_device *sp = data; + + if (sp->ccp_irq_handler) + sp->ccp_irq_handler(irq, sp->ccp_irq_data); + + if (sp->psp_irq_handler) + sp->psp_irq_handler(irq, sp->psp_irq_data); + + return IRQ_HANDLED; +} + +int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data) +{ + int ret; + + if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) { + /* Need a common routine to manage all interrupts */ + sp->ccp_irq_data = data; + sp->ccp_irq_handler = handler; + + if (!sp->irq_registered) { + ret = request_irq(sp->ccp_irq, sp_irq_handler, 0, + sp->name, sp); + if (ret) + return ret; + + sp->irq_registered = true; + } + } else { + /* Each sub-device can manage it's own interrupt */ + ret = request_irq(sp->ccp_irq, handler, 0, name, data); + if (ret) + return ret; + } + + return 0; +} + +int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data) +{ + int ret; + + if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) { + /* Need a common routine to manage all interrupts */ + sp->psp_irq_data = data; + sp->psp_irq_handler = handler; + + if (!sp->irq_registered) { + ret = request_irq(sp->psp_irq, sp_irq_handler, 0, + sp->name, sp); + if (ret) + return ret; + + sp->irq_registered = true; + } + } else { + /* Each sub-device can manage it's own interrupt */ + ret = request_irq(sp->psp_irq, handler, 0, name, data); + if (ret) + return ret; + } + + return 0; +} + +void sp_free_ccp_irq(struct sp_device *sp, void *data) +{ + if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) { + /* Using common routine to manage all interrupts */ + if (!sp->psp_irq_handler) { + /* Nothing else using it, so free it */ + free_irq(sp->ccp_irq, sp); + + sp->irq_registered = false; + } + + sp->ccp_irq_handler = NULL; + sp->ccp_irq_data = NULL; + } else { + /* Each sub-device can manage it's own interrupt */ + free_irq(sp->ccp_irq, data); + } +} + +void sp_free_psp_irq(struct sp_device *sp, void *data) +{ + if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) { + /* Using common routine to manage all interrupts */ + if (!sp->ccp_irq_handler) { + /* Nothing else using it, so free it */ + free_irq(sp->psp_irq, sp); + + sp->irq_registered = false; + } + + sp->psp_irq_handler = NULL; + sp->psp_irq_data = NULL; + } else { + /* Each sub-device can manage it's own interrupt */ + free_irq(sp->psp_irq, data); + } +} + +/** + * sp_alloc_struct - allocate and initialize the sp_device struct + * + * @dev: device struct of the SP + */ +struct sp_device *sp_alloc_struct(struct device *dev) +{ + struct sp_device *sp; + + sp = devm_kzalloc(dev, sizeof(*sp), GFP_KERNEL); + if (!sp) + return NULL; + + sp->dev = dev; + sp->ord = atomic_inc_return(&sp_ordinal); + snprintf(sp->name, SP_MAX_NAME_LEN, "sp-%u", sp->ord); + + return sp; +} + +int sp_init(struct sp_device *sp) +{ + sp_add_device(sp); + + if (sp->dev_vdata->ccp_vdata) + ccp_dev_init(sp); + + if (sp->dev_vdata->psp_vdata) + psp_dev_init(sp); + return 0; +} + +void sp_destroy(struct sp_device *sp) +{ + if (sp->dev_vdata->ccp_vdata) + ccp_dev_destroy(sp); + + if (sp->dev_vdata->psp_vdata) + psp_dev_destroy(sp); + + sp_del_device(sp); +} + +int sp_suspend(struct sp_device *sp) +{ + int ret; + + if (sp->dev_vdata->ccp_vdata) { + ret = ccp_dev_suspend(sp); + if (ret) + return ret; + } + + return 0; +} + +int sp_resume(struct sp_device *sp) +{ + int ret; + + if (sp->dev_vdata->ccp_vdata) { + ret = ccp_dev_resume(sp); + if (ret) + return ret; + } + + return 0; +} + +struct sp_device *sp_get_psp_master_device(void) +{ + struct sp_device *i, *ret = NULL; + unsigned long flags; + + write_lock_irqsave(&sp_unit_lock, flags); + if (list_empty(&sp_units)) + goto unlock; + + list_for_each_entry(i, &sp_units, entry) { + if (i->psp_data && i->get_psp_master_device) { + ret = i->get_psp_master_device(); + break; + } + } + +unlock: + write_unlock_irqrestore(&sp_unit_lock, flags); + return ret; +} + +static int __init sp_mod_init(void) +{ +#ifdef CONFIG_X86 + int ret; + + ret = sp_pci_init(); + if (ret) + return ret; + +#ifdef CONFIG_CRYPTO_DEV_SP_PSP + psp_pci_init(); +#endif + + return 0; +#endif + +#ifdef CONFIG_ARM64 + int ret; + + ret = sp_platform_init(); + if (ret) + return ret; + + return 0; +#endif + + return -ENODEV; +} + +static void __exit sp_mod_exit(void) +{ +#ifdef CONFIG_X86 + +#ifdef CONFIG_CRYPTO_DEV_SP_PSP + psp_pci_exit(); +#endif + + sp_pci_exit(); +#endif + +#ifdef CONFIG_ARM64 + sp_platform_exit(); +#endif +} + +module_init(sp_mod_init); +module_exit(sp_mod_exit); diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h new file mode 100644 index 000000000..0218d0670 --- /dev/null +++ b/drivers/crypto/ccp/sp-dev.h @@ -0,0 +1,174 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * AMD Secure Processor driver + * + * Copyright (C) 2017-2019 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + * Author: Brijesh Singh <brijesh.singh@amd.com> + */ + +#ifndef __SP_DEV_H__ +#define __SP_DEV_H__ + +#include <linux/device.h> +#include <linux/spinlock.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/wait.h> +#include <linux/dmapool.h> +#include <linux/hw_random.h> +#include <linux/bitops.h> +#include <linux/interrupt.h> +#include <linux/irqreturn.h> + +#define SP_MAX_NAME_LEN 32 + +#define CACHE_NONE 0x00 +#define CACHE_WB_NO_ALLOC 0xb7 + +/* Structure to hold CCP device data */ +struct ccp_device; +struct ccp_vdata { + const unsigned int version; + const unsigned int dma_chan_attr; + void (*setup)(struct ccp_device *); + const struct ccp_actions *perform; + const unsigned int offset; + const unsigned int rsamax; +}; + +struct sev_vdata { + const unsigned int cmdresp_reg; + const unsigned int cmdbuff_addr_lo_reg; + const unsigned int cmdbuff_addr_hi_reg; +}; + +struct tee_vdata { + const unsigned int cmdresp_reg; + const unsigned int cmdbuff_addr_lo_reg; + const unsigned int cmdbuff_addr_hi_reg; + const unsigned int ring_wptr_reg; + const unsigned int ring_rptr_reg; +}; + +struct psp_vdata { + const struct sev_vdata *sev; + const struct tee_vdata *tee; + const unsigned int feature_reg; + const unsigned int inten_reg; + const unsigned int intsts_reg; +}; + +/* Structure to hold SP device data */ +struct sp_dev_vdata { + const unsigned int bar; + + const struct ccp_vdata *ccp_vdata; + const struct psp_vdata *psp_vdata; +}; + +struct sp_device { + struct list_head entry; + + struct device *dev; + + struct sp_dev_vdata *dev_vdata; + unsigned int ord; + char name[SP_MAX_NAME_LEN]; + + /* Bus specific device information */ + void *dev_specific; + + /* I/O area used for device communication. */ + void __iomem *io_map; + + /* DMA caching attribute support */ + unsigned int axcache; + + /* get and set master device */ + struct sp_device*(*get_psp_master_device)(void); + void (*set_psp_master_device)(struct sp_device *); + void (*clear_psp_master_device)(struct sp_device *); + + bool irq_registered; + bool use_tasklet; + + unsigned int ccp_irq; + irq_handler_t ccp_irq_handler; + void *ccp_irq_data; + + unsigned int psp_irq; + irq_handler_t psp_irq_handler; + void *psp_irq_data; + + void *ccp_data; + void *psp_data; +}; + +int sp_pci_init(void); +void sp_pci_exit(void); + +int sp_platform_init(void); +void sp_platform_exit(void); + +struct sp_device *sp_alloc_struct(struct device *dev); + +int sp_init(struct sp_device *sp); +void sp_destroy(struct sp_device *sp); +struct sp_device *sp_get_master(void); + +int sp_suspend(struct sp_device *sp); +int sp_resume(struct sp_device *sp); +int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data); +void sp_free_ccp_irq(struct sp_device *sp, void *data); +int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler, + const char *name, void *data); +void sp_free_psp_irq(struct sp_device *sp, void *data); +struct sp_device *sp_get_psp_master_device(void); + +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + +int ccp_dev_init(struct sp_device *sp); +void ccp_dev_destroy(struct sp_device *sp); + +int ccp_dev_suspend(struct sp_device *sp); +int ccp_dev_resume(struct sp_device *sp); + +#else /* !CONFIG_CRYPTO_DEV_SP_CCP */ + +static inline int ccp_dev_init(struct sp_device *sp) +{ + return 0; +} +static inline void ccp_dev_destroy(struct sp_device *sp) { } + +static inline int ccp_dev_suspend(struct sp_device *sp) +{ + return 0; +} +static inline int ccp_dev_resume(struct sp_device *sp) +{ + return 0; +} +#endif /* CONFIG_CRYPTO_DEV_SP_CCP */ + +#ifdef CONFIG_CRYPTO_DEV_SP_PSP + +int psp_dev_init(struct sp_device *sp); +void psp_pci_init(void); +void psp_dev_destroy(struct sp_device *sp); +void psp_pci_exit(void); + +#else /* !CONFIG_CRYPTO_DEV_SP_PSP */ + +static inline int psp_dev_init(struct sp_device *sp) { return 0; } +static inline void psp_pci_init(void) { } +static inline void psp_dev_destroy(struct sp_device *sp) { } +static inline void psp_pci_exit(void) { } + +#endif /* CONFIG_CRYPTO_DEV_SP_PSP */ + +#endif diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c new file mode 100644 index 000000000..c319e7e39 --- /dev/null +++ b/drivers/crypto/ccp/sp-pci.c @@ -0,0 +1,396 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure Processor device driver + * + * Copyright (C) 2013,2019 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + * Author: Gary R Hook <gary.hook@amd.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/dma-mapping.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/ccp.h> + +#include "ccp-dev.h" +#include "psp-dev.h" + +#define MSIX_VECTORS 2 + +struct sp_pci { + int msix_count; + struct msix_entry msix_entry[MSIX_VECTORS]; +}; +static struct sp_device *sp_dev_master; + +static int sp_get_msix_irqs(struct sp_device *sp) +{ + struct sp_pci *sp_pci = sp->dev_specific; + struct device *dev = sp->dev; + struct pci_dev *pdev = to_pci_dev(dev); + int v, ret; + + for (v = 0; v < ARRAY_SIZE(sp_pci->msix_entry); v++) + sp_pci->msix_entry[v].entry = v; + + ret = pci_enable_msix_range(pdev, sp_pci->msix_entry, 1, v); + if (ret < 0) + return ret; + + sp_pci->msix_count = ret; + sp->use_tasklet = true; + + sp->psp_irq = sp_pci->msix_entry[0].vector; + sp->ccp_irq = (sp_pci->msix_count > 1) ? sp_pci->msix_entry[1].vector + : sp_pci->msix_entry[0].vector; + return 0; +} + +static int sp_get_msi_irq(struct sp_device *sp) +{ + struct device *dev = sp->dev; + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_enable_msi(pdev); + if (ret) + return ret; + + sp->ccp_irq = pdev->irq; + sp->psp_irq = pdev->irq; + + return 0; +} + +static int sp_get_irqs(struct sp_device *sp) +{ + struct device *dev = sp->dev; + int ret; + + ret = sp_get_msix_irqs(sp); + if (!ret) + return 0; + + /* Couldn't get MSI-X vectors, try MSI */ + dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret); + ret = sp_get_msi_irq(sp); + if (!ret) + return 0; + + /* Couldn't get MSI interrupt */ + dev_notice(dev, "could not enable MSI (%d)\n", ret); + + return ret; +} + +static void sp_free_irqs(struct sp_device *sp) +{ + struct sp_pci *sp_pci = sp->dev_specific; + struct device *dev = sp->dev; + struct pci_dev *pdev = to_pci_dev(dev); + + if (sp_pci->msix_count) + pci_disable_msix(pdev); + else if (sp->psp_irq) + pci_disable_msi(pdev); + + sp->ccp_irq = 0; + sp->psp_irq = 0; +} + +static bool sp_pci_is_master(struct sp_device *sp) +{ + struct device *dev_cur, *dev_new; + struct pci_dev *pdev_cur, *pdev_new; + + dev_new = sp->dev; + dev_cur = sp_dev_master->dev; + + pdev_new = to_pci_dev(dev_new); + pdev_cur = to_pci_dev(dev_cur); + + if (pdev_new->bus->number < pdev_cur->bus->number) + return true; + + if (PCI_SLOT(pdev_new->devfn) < PCI_SLOT(pdev_cur->devfn)) + return true; + + if (PCI_FUNC(pdev_new->devfn) < PCI_FUNC(pdev_cur->devfn)) + return true; + + return false; +} + +static void psp_set_master(struct sp_device *sp) +{ + if (!sp_dev_master) { + sp_dev_master = sp; + return; + } + + if (sp_pci_is_master(sp)) + sp_dev_master = sp; +} + +static struct sp_device *psp_get_master(void) +{ + return sp_dev_master; +} + +static void psp_clear_master(struct sp_device *sp) +{ + if (sp == sp_dev_master) { + sp_dev_master = NULL; + dev_dbg(sp->dev, "Cleared sp_dev_master\n"); + } +} + +static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct sp_device *sp; + struct sp_pci *sp_pci; + struct device *dev = &pdev->dev; + void __iomem * const *iomap_table; + int bar_mask; + int ret; + + ret = -ENOMEM; + sp = sp_alloc_struct(dev); + if (!sp) + goto e_err; + + sp_pci = devm_kzalloc(dev, sizeof(*sp_pci), GFP_KERNEL); + if (!sp_pci) + goto e_err; + + sp->dev_specific = sp_pci; + sp->dev_vdata = (struct sp_dev_vdata *)id->driver_data; + if (!sp->dev_vdata) { + ret = -ENODEV; + dev_err(dev, "missing driver data\n"); + goto e_err; + } + + ret = pcim_enable_device(pdev); + if (ret) { + dev_err(dev, "pcim_enable_device failed (%d)\n", ret); + goto e_err; + } + + bar_mask = pci_select_bars(pdev, IORESOURCE_MEM); + ret = pcim_iomap_regions(pdev, bar_mask, "ccp"); + if (ret) { + dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret); + goto e_err; + } + + iomap_table = pcim_iomap_table(pdev); + if (!iomap_table) { + dev_err(dev, "pcim_iomap_table failed\n"); + ret = -ENOMEM; + goto e_err; + } + + sp->io_map = iomap_table[sp->dev_vdata->bar]; + if (!sp->io_map) { + dev_err(dev, "ioremap failed\n"); + ret = -ENOMEM; + goto e_err; + } + + ret = sp_get_irqs(sp); + if (ret) + goto e_err; + + pci_set_master(pdev); + sp->set_psp_master_device = psp_set_master; + sp->get_psp_master_device = psp_get_master; + sp->clear_psp_master_device = psp_clear_master; + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", + ret); + goto free_irqs; + } + } + + dev_set_drvdata(dev, sp); + + ret = sp_init(sp); + if (ret) + goto free_irqs; + + return 0; + +free_irqs: + sp_free_irqs(sp); +e_err: + dev_notice(dev, "initialization failed\n"); + return ret; +} + +static void sp_pci_shutdown(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + if (!sp) + return; + + sp_destroy(sp); +} + +static void sp_pci_remove(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + if (!sp) + return; + + sp_destroy(sp); + + sp_free_irqs(sp); +} + +static int __maybe_unused sp_pci_suspend(struct device *dev) +{ + struct sp_device *sp = dev_get_drvdata(dev); + + return sp_suspend(sp); +} + +static int __maybe_unused sp_pci_resume(struct device *dev) +{ + struct sp_device *sp = dev_get_drvdata(dev); + + return sp_resume(sp); +} + +#ifdef CONFIG_CRYPTO_DEV_SP_PSP +static const struct sev_vdata sevv1 = { + .cmdresp_reg = 0x10580, + .cmdbuff_addr_lo_reg = 0x105e0, + .cmdbuff_addr_hi_reg = 0x105e4, +}; + +static const struct sev_vdata sevv2 = { + .cmdresp_reg = 0x10980, + .cmdbuff_addr_lo_reg = 0x109e0, + .cmdbuff_addr_hi_reg = 0x109e4, +}; + +static const struct tee_vdata teev1 = { + .cmdresp_reg = 0x10544, + .cmdbuff_addr_lo_reg = 0x10548, + .cmdbuff_addr_hi_reg = 0x1054c, + .ring_wptr_reg = 0x10550, + .ring_rptr_reg = 0x10554, +}; + +static const struct psp_vdata pspv1 = { + .sev = &sevv1, + .feature_reg = 0x105fc, + .inten_reg = 0x10610, + .intsts_reg = 0x10614, +}; + +static const struct psp_vdata pspv2 = { + .sev = &sevv2, + .feature_reg = 0x109fc, + .inten_reg = 0x10690, + .intsts_reg = 0x10694, +}; + +static const struct psp_vdata pspv3 = { + .tee = &teev1, + .feature_reg = 0x109fc, + .inten_reg = 0x10690, + .intsts_reg = 0x10694, +}; +#endif + +static const struct sp_dev_vdata dev_vdata[] = { + { /* 0 */ + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv3, +#endif + }, + { /* 1 */ + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv5a, +#endif +#ifdef CONFIG_CRYPTO_DEV_SP_PSP + .psp_vdata = &pspv1, +#endif + }, + { /* 2 */ + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv5b, +#endif + }, + { /* 3 */ + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv5a, +#endif +#ifdef CONFIG_CRYPTO_DEV_SP_PSP + .psp_vdata = &pspv2, +#endif + }, + { /* 4 */ + .bar = 2, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv5a, +#endif +#ifdef CONFIG_CRYPTO_DEV_SP_PSP + .psp_vdata = &pspv3, +#endif + }, +}; +static const struct pci_device_id sp_pci_table[] = { + { PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] }, + { PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&dev_vdata[1] }, + { PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&dev_vdata[2] }, + { PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] }, + { PCI_VDEVICE(AMD, 0x15DF), (kernel_ulong_t)&dev_vdata[4] }, + /* Last entry must be zero */ + { 0, } +}; +MODULE_DEVICE_TABLE(pci, sp_pci_table); + +static SIMPLE_DEV_PM_OPS(sp_pci_pm_ops, sp_pci_suspend, sp_pci_resume); + +static struct pci_driver sp_pci_driver = { + .name = "ccp", + .id_table = sp_pci_table, + .probe = sp_pci_probe, + .remove = sp_pci_remove, + .shutdown = sp_pci_shutdown, + .driver.pm = &sp_pci_pm_ops, +}; + +int sp_pci_init(void) +{ + return pci_register_driver(&sp_pci_driver); +} + +void sp_pci_exit(void) +{ + pci_unregister_driver(&sp_pci_driver); +} diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c new file mode 100644 index 000000000..9dba52fbe --- /dev/null +++ b/drivers/crypto/ccp/sp-platform.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD Secure Processor device driver + * + * Copyright (C) 2014,2018 Advanced Micro Devices, Inc. + * + * Author: Tom Lendacky <thomas.lendacky@amd.com> + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/platform_device.h> +#include <linux/ioport.h> +#include <linux/dma-mapping.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/delay.h> +#include <linux/ccp.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/acpi.h> + +#include "ccp-dev.h" + +struct sp_platform { + int coherent; + unsigned int irq_count; +}; + +static const struct sp_dev_vdata dev_vdata[] = { + { + .bar = 0, +#ifdef CONFIG_CRYPTO_DEV_SP_CCP + .ccp_vdata = &ccpv3_platform, +#endif + }, +}; + +#ifdef CONFIG_ACPI +static const struct acpi_device_id sp_acpi_match[] = { + { "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] }, + { }, +}; +MODULE_DEVICE_TABLE(acpi, sp_acpi_match); +#endif + +#ifdef CONFIG_OF +static const struct of_device_id sp_of_match[] = { + { .compatible = "amd,ccp-seattle-v1a", + .data = (const void *)&dev_vdata[0] }, + { }, +}; +MODULE_DEVICE_TABLE(of, sp_of_match); +#endif + +static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev) +{ +#ifdef CONFIG_OF + const struct of_device_id *match; + + match = of_match_node(sp_of_match, pdev->dev.of_node); + if (match && match->data) + return (struct sp_dev_vdata *)match->data; +#endif + return NULL; +} + +static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev) +{ +#ifdef CONFIG_ACPI + const struct acpi_device_id *match; + + match = acpi_match_device(sp_acpi_match, &pdev->dev); + if (match && match->driver_data) + return (struct sp_dev_vdata *)match->driver_data; +#endif + return NULL; +} + +static int sp_get_irqs(struct sp_device *sp) +{ + struct sp_platform *sp_platform = sp->dev_specific; + struct device *dev = sp->dev; + struct platform_device *pdev = to_platform_device(dev); + unsigned int i, count; + int ret; + + for (i = 0, count = 0; i < pdev->num_resources; i++) { + struct resource *res = &pdev->resource[i]; + + if (resource_type(res) == IORESOURCE_IRQ) + count++; + } + + sp_platform->irq_count = count; + + ret = platform_get_irq(pdev, 0); + if (ret < 0) { + dev_notice(dev, "unable to get IRQ (%d)\n", ret); + return ret; + } + + sp->psp_irq = ret; + if (count == 1) { + sp->ccp_irq = ret; + } else { + ret = platform_get_irq(pdev, 1); + if (ret < 0) { + dev_notice(dev, "unable to get IRQ (%d)\n", ret); + return ret; + } + + sp->ccp_irq = ret; + } + + return 0; +} + +static int sp_platform_probe(struct platform_device *pdev) +{ + struct sp_device *sp; + struct sp_platform *sp_platform; + struct device *dev = &pdev->dev; + enum dev_dma_attr attr; + int ret; + + ret = -ENOMEM; + sp = sp_alloc_struct(dev); + if (!sp) + goto e_err; + + sp_platform = devm_kzalloc(dev, sizeof(*sp_platform), GFP_KERNEL); + if (!sp_platform) + goto e_err; + + sp->dev_specific = sp_platform; + sp->dev_vdata = pdev->dev.of_node ? sp_get_of_version(pdev) + : sp_get_acpi_version(pdev); + if (!sp->dev_vdata) { + ret = -ENODEV; + dev_err(dev, "missing driver data\n"); + goto e_err; + } + + sp->io_map = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(sp->io_map)) { + ret = PTR_ERR(sp->io_map); + goto e_err; + } + + attr = device_get_dma_attr(dev); + if (attr == DEV_DMA_NOT_SUPPORTED) { + dev_err(dev, "DMA is not supported"); + goto e_err; + } + + sp_platform->coherent = (attr == DEV_DMA_COHERENT); + if (sp_platform->coherent) + sp->axcache = CACHE_WB_NO_ALLOC; + else + sp->axcache = CACHE_NONE; + + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48)); + if (ret) { + dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret); + goto e_err; + } + + ret = sp_get_irqs(sp); + if (ret) + goto e_err; + + dev_set_drvdata(dev, sp); + + ret = sp_init(sp); + if (ret) + goto e_err; + + dev_notice(dev, "enabled\n"); + + return 0; + +e_err: + dev_notice(dev, "initialization failed\n"); + return ret; +} + +static int sp_platform_remove(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + sp_destroy(sp); + + dev_notice(dev, "disabled\n"); + + return 0; +} + +#ifdef CONFIG_PM +static int sp_platform_suspend(struct platform_device *pdev, + pm_message_t state) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + return sp_suspend(sp); +} + +static int sp_platform_resume(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct sp_device *sp = dev_get_drvdata(dev); + + return sp_resume(sp); +} +#endif + +static struct platform_driver sp_platform_driver = { + .driver = { + .name = "ccp", +#ifdef CONFIG_ACPI + .acpi_match_table = sp_acpi_match, +#endif +#ifdef CONFIG_OF + .of_match_table = sp_of_match, +#endif + }, + .probe = sp_platform_probe, + .remove = sp_platform_remove, +#ifdef CONFIG_PM + .suspend = sp_platform_suspend, + .resume = sp_platform_resume, +#endif +}; + +int sp_platform_init(void) +{ + return platform_driver_register(&sp_platform_driver); +} + +void sp_platform_exit(void) +{ + platform_driver_unregister(&sp_platform_driver); +} diff --git a/drivers/crypto/ccp/tee-dev.c b/drivers/crypto/ccp/tee-dev.c new file mode 100644 index 000000000..bcb81fef4 --- /dev/null +++ b/drivers/crypto/ccp/tee-dev.c @@ -0,0 +1,396 @@ +// SPDX-License-Identifier: MIT +/* + * AMD Trusted Execution Environment (TEE) interface + * + * Author: Rijo Thomas <Rijo-john.Thomas@amd.com> + * Author: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com> + * + * Copyright 2019 Advanced Micro Devices, Inc. + */ + +#include <linux/types.h> +#include <linux/mutex.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/gfp.h> +#include <linux/psp-sev.h> +#include <linux/psp-tee.h> + +#include "psp-dev.h" +#include "tee-dev.h" + +static bool psp_dead; + +static int tee_alloc_ring(struct psp_tee_device *tee, int ring_size) +{ + struct ring_buf_manager *rb_mgr = &tee->rb_mgr; + void *start_addr; + + if (!ring_size) + return -EINVAL; + + /* We need actual physical address instead of DMA address, since + * Trusted OS running on AMD Secure Processor will map this region + */ + start_addr = (void *)__get_free_pages(GFP_KERNEL, get_order(ring_size)); + if (!start_addr) + return -ENOMEM; + + memset(start_addr, 0x0, ring_size); + rb_mgr->ring_start = start_addr; + rb_mgr->ring_size = ring_size; + rb_mgr->ring_pa = __psp_pa(start_addr); + mutex_init(&rb_mgr->mutex); + + return 0; +} + +static void tee_free_ring(struct psp_tee_device *tee) +{ + struct ring_buf_manager *rb_mgr = &tee->rb_mgr; + + if (!rb_mgr->ring_start) + return; + + free_pages((unsigned long)rb_mgr->ring_start, + get_order(rb_mgr->ring_size)); + + rb_mgr->ring_start = NULL; + rb_mgr->ring_size = 0; + rb_mgr->ring_pa = 0; + mutex_destroy(&rb_mgr->mutex); +} + +static int tee_wait_cmd_poll(struct psp_tee_device *tee, unsigned int timeout, + unsigned int *reg) +{ + /* ~10ms sleep per loop => nloop = timeout * 100 */ + int nloop = timeout * 100; + + while (--nloop) { + *reg = ioread32(tee->io_regs + tee->vdata->cmdresp_reg); + if (*reg & PSP_CMDRESP_RESP) + return 0; + + usleep_range(10000, 10100); + } + + dev_err(tee->dev, "tee: command timed out, disabling PSP\n"); + psp_dead = true; + + return -ETIMEDOUT; +} + +static +struct tee_init_ring_cmd *tee_alloc_cmd_buffer(struct psp_tee_device *tee) +{ + struct tee_init_ring_cmd *cmd; + + cmd = kzalloc(sizeof(*cmd), GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->hi_addr = upper_32_bits(tee->rb_mgr.ring_pa); + cmd->low_addr = lower_32_bits(tee->rb_mgr.ring_pa); + cmd->size = tee->rb_mgr.ring_size; + + dev_dbg(tee->dev, "tee: ring address: high = 0x%x low = 0x%x size = %u\n", + cmd->hi_addr, cmd->low_addr, cmd->size); + + return cmd; +} + +static inline void tee_free_cmd_buffer(struct tee_init_ring_cmd *cmd) +{ + kfree(cmd); +} + +static int tee_init_ring(struct psp_tee_device *tee) +{ + int ring_size = MAX_RING_BUFFER_ENTRIES * sizeof(struct tee_ring_cmd); + struct tee_init_ring_cmd *cmd; + phys_addr_t cmd_buffer; + unsigned int reg; + int ret; + + BUILD_BUG_ON(sizeof(struct tee_ring_cmd) != 1024); + + ret = tee_alloc_ring(tee, ring_size); + if (ret) { + dev_err(tee->dev, "tee: ring allocation failed %d\n", ret); + return ret; + } + + tee->rb_mgr.wptr = 0; + + cmd = tee_alloc_cmd_buffer(tee); + if (!cmd) { + tee_free_ring(tee); + return -ENOMEM; + } + + cmd_buffer = __psp_pa((void *)cmd); + + /* Send command buffer details to Trusted OS by writing to + * CPU-PSP message registers + */ + + iowrite32(lower_32_bits(cmd_buffer), + tee->io_regs + tee->vdata->cmdbuff_addr_lo_reg); + iowrite32(upper_32_bits(cmd_buffer), + tee->io_regs + tee->vdata->cmdbuff_addr_hi_reg); + iowrite32(TEE_RING_INIT_CMD, + tee->io_regs + tee->vdata->cmdresp_reg); + + ret = tee_wait_cmd_poll(tee, TEE_DEFAULT_TIMEOUT, ®); + if (ret) { + dev_err(tee->dev, "tee: ring init command timed out\n"); + tee_free_ring(tee); + goto free_buf; + } + + if (reg & PSP_CMDRESP_ERR_MASK) { + dev_err(tee->dev, "tee: ring init command failed (%#010x)\n", + reg & PSP_CMDRESP_ERR_MASK); + tee_free_ring(tee); + ret = -EIO; + } + +free_buf: + tee_free_cmd_buffer(cmd); + + return ret; +} + +static void tee_destroy_ring(struct psp_tee_device *tee) +{ + unsigned int reg; + int ret; + + if (!tee->rb_mgr.ring_start) + return; + + if (psp_dead) + goto free_ring; + + iowrite32(TEE_RING_DESTROY_CMD, + tee->io_regs + tee->vdata->cmdresp_reg); + + ret = tee_wait_cmd_poll(tee, TEE_DEFAULT_TIMEOUT, ®); + if (ret) { + dev_err(tee->dev, "tee: ring destroy command timed out\n"); + } else if (reg & PSP_CMDRESP_ERR_MASK) { + dev_err(tee->dev, "tee: ring destroy command failed (%#010x)\n", + reg & PSP_CMDRESP_ERR_MASK); + } + +free_ring: + tee_free_ring(tee); +} + +int tee_dev_init(struct psp_device *psp) +{ + struct device *dev = psp->dev; + struct psp_tee_device *tee; + int ret; + + ret = -ENOMEM; + tee = devm_kzalloc(dev, sizeof(*tee), GFP_KERNEL); + if (!tee) + goto e_err; + + psp->tee_data = tee; + + tee->dev = dev; + tee->psp = psp; + + tee->io_regs = psp->io_regs; + + tee->vdata = (struct tee_vdata *)psp->vdata->tee; + if (!tee->vdata) { + ret = -ENODEV; + dev_err(dev, "tee: missing driver data\n"); + goto e_err; + } + + ret = tee_init_ring(tee); + if (ret) { + dev_err(dev, "tee: failed to init ring buffer\n"); + goto e_err; + } + + dev_notice(dev, "tee enabled\n"); + + return 0; + +e_err: + psp->tee_data = NULL; + + dev_notice(dev, "tee initialization failed\n"); + + return ret; +} + +void tee_dev_destroy(struct psp_device *psp) +{ + struct psp_tee_device *tee = psp->tee_data; + + if (!tee) + return; + + tee_destroy_ring(tee); +} + +static int tee_submit_cmd(struct psp_tee_device *tee, enum tee_cmd_id cmd_id, + void *buf, size_t len, struct tee_ring_cmd **resp) +{ + struct tee_ring_cmd *cmd; + int nloop = 1000, ret = 0; + u32 rptr; + + *resp = NULL; + + mutex_lock(&tee->rb_mgr.mutex); + + /* Loop until empty entry found in ring buffer */ + do { + /* Get pointer to ring buffer command entry */ + cmd = (struct tee_ring_cmd *) + (tee->rb_mgr.ring_start + tee->rb_mgr.wptr); + + rptr = ioread32(tee->io_regs + tee->vdata->ring_rptr_reg); + + /* Check if ring buffer is full or command entry is waiting + * for response from TEE + */ + if (!(tee->rb_mgr.wptr + sizeof(struct tee_ring_cmd) == rptr || + cmd->flag == CMD_WAITING_FOR_RESPONSE)) + break; + + dev_dbg(tee->dev, "tee: ring buffer full. rptr = %u wptr = %u\n", + rptr, tee->rb_mgr.wptr); + + /* Wait if ring buffer is full or TEE is processing data */ + mutex_unlock(&tee->rb_mgr.mutex); + schedule_timeout_interruptible(msecs_to_jiffies(10)); + mutex_lock(&tee->rb_mgr.mutex); + + } while (--nloop); + + if (!nloop && + (tee->rb_mgr.wptr + sizeof(struct tee_ring_cmd) == rptr || + cmd->flag == CMD_WAITING_FOR_RESPONSE)) { + dev_err(tee->dev, "tee: ring buffer full. rptr = %u wptr = %u response flag %u\n", + rptr, tee->rb_mgr.wptr, cmd->flag); + ret = -EBUSY; + goto unlock; + } + + /* Do not submit command if PSP got disabled while processing any + * command in another thread + */ + if (psp_dead) { + ret = -EBUSY; + goto unlock; + } + + /* Write command data into ring buffer */ + cmd->cmd_id = cmd_id; + cmd->cmd_state = TEE_CMD_STATE_INIT; + memset(&cmd->buf[0], 0, sizeof(cmd->buf)); + memcpy(&cmd->buf[0], buf, len); + + /* Indicate driver is waiting for response */ + cmd->flag = CMD_WAITING_FOR_RESPONSE; + + /* Update local copy of write pointer */ + tee->rb_mgr.wptr += sizeof(struct tee_ring_cmd); + if (tee->rb_mgr.wptr >= tee->rb_mgr.ring_size) + tee->rb_mgr.wptr = 0; + + /* Trigger interrupt to Trusted OS */ + iowrite32(tee->rb_mgr.wptr, tee->io_regs + tee->vdata->ring_wptr_reg); + + /* The response is provided by Trusted OS in same + * location as submitted data entry within ring buffer. + */ + *resp = cmd; + +unlock: + mutex_unlock(&tee->rb_mgr.mutex); + + return ret; +} + +static int tee_wait_cmd_completion(struct psp_tee_device *tee, + struct tee_ring_cmd *resp, + unsigned int timeout) +{ + /* ~5ms sleep per loop => nloop = timeout * 200 */ + int nloop = timeout * 200; + + while (--nloop) { + if (resp->cmd_state == TEE_CMD_STATE_COMPLETED) + return 0; + + usleep_range(5000, 5100); + } + + dev_err(tee->dev, "tee: command 0x%x timed out, disabling PSP\n", + resp->cmd_id); + + psp_dead = true; + + return -ETIMEDOUT; +} + +int psp_tee_process_cmd(enum tee_cmd_id cmd_id, void *buf, size_t len, + u32 *status) +{ + struct psp_device *psp = psp_get_master_device(); + struct psp_tee_device *tee; + struct tee_ring_cmd *resp; + int ret; + + if (!buf || !status || !len || len > sizeof(resp->buf)) + return -EINVAL; + + *status = 0; + + if (!psp || !psp->tee_data) + return -ENODEV; + + if (psp_dead) + return -EBUSY; + + tee = psp->tee_data; + + ret = tee_submit_cmd(tee, cmd_id, buf, len, &resp); + if (ret) + return ret; + + ret = tee_wait_cmd_completion(tee, resp, TEE_DEFAULT_TIMEOUT); + if (ret) { + resp->flag = CMD_RESPONSE_TIMEDOUT; + return ret; + } + + memcpy(buf, &resp->buf[0], len); + *status = resp->status; + + resp->flag = CMD_RESPONSE_COPIED; + + return 0; +} +EXPORT_SYMBOL(psp_tee_process_cmd); + +int psp_check_tee_status(void) +{ + struct psp_device *psp = psp_get_master_device(); + + if (!psp || !psp->tee_data) + return -ENODEV; + + return 0; +} +EXPORT_SYMBOL(psp_check_tee_status); diff --git a/drivers/crypto/ccp/tee-dev.h b/drivers/crypto/ccp/tee-dev.h new file mode 100644 index 000000000..49d26158b --- /dev/null +++ b/drivers/crypto/ccp/tee-dev.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) 2019,2021 Advanced Micro Devices, Inc. + * + * Author: Rijo Thomas <Rijo-john.Thomas@amd.com> + * Author: Devaraj Rangasamy <Devaraj.Rangasamy@amd.com> + * + */ + +/* This file describes the TEE communication interface between host and AMD + * Secure Processor + */ + +#ifndef __TEE_DEV_H__ +#define __TEE_DEV_H__ + +#include <linux/device.h> +#include <linux/mutex.h> + +#define TEE_DEFAULT_TIMEOUT 10 +#define MAX_BUFFER_SIZE 988 + +/** + * enum tee_ring_cmd_id - TEE interface commands for ring buffer configuration + * @TEE_RING_INIT_CMD: Initialize ring buffer + * @TEE_RING_DESTROY_CMD: Destroy ring buffer + * @TEE_RING_MAX_CMD: Maximum command id + */ +enum tee_ring_cmd_id { + TEE_RING_INIT_CMD = 0x00010000, + TEE_RING_DESTROY_CMD = 0x00020000, + TEE_RING_MAX_CMD = 0x000F0000, +}; + +/** + * struct tee_init_ring_cmd - Command to init TEE ring buffer + * @low_addr: bits [31:0] of the physical address of ring buffer + * @hi_addr: bits [63:32] of the physical address of ring buffer + * @size: size of ring buffer in bytes + */ +struct tee_init_ring_cmd { + u32 low_addr; + u32 hi_addr; + u32 size; +}; + +#define MAX_RING_BUFFER_ENTRIES 32 + +/** + * struct ring_buf_manager - Helper structure to manage ring buffer. + * @ring_start: starting address of ring buffer + * @ring_size: size of ring buffer in bytes + * @ring_pa: physical address of ring buffer + * @wptr: index to the last written entry in ring buffer + */ +struct ring_buf_manager { + struct mutex mutex; /* synchronizes access to ring buffer */ + void *ring_start; + u32 ring_size; + phys_addr_t ring_pa; + u32 wptr; +}; + +struct psp_tee_device { + struct device *dev; + struct psp_device *psp; + void __iomem *io_regs; + struct tee_vdata *vdata; + struct ring_buf_manager rb_mgr; +}; + +/** + * enum tee_cmd_state - TEE command states for the ring buffer interface + * @TEE_CMD_STATE_INIT: initial state of command when sent from host + * @TEE_CMD_STATE_PROCESS: command being processed by TEE environment + * @TEE_CMD_STATE_COMPLETED: command processing completed + */ +enum tee_cmd_state { + TEE_CMD_STATE_INIT, + TEE_CMD_STATE_PROCESS, + TEE_CMD_STATE_COMPLETED, +}; + +/** + * enum cmd_resp_state - TEE command's response status maintained by driver + * @CMD_RESPONSE_INVALID: initial state when no command is written to ring + * @CMD_WAITING_FOR_RESPONSE: driver waiting for response from TEE + * @CMD_RESPONSE_TIMEDOUT: failed to get response from TEE + * @CMD_RESPONSE_COPIED: driver has copied response from TEE + */ +enum cmd_resp_state { + CMD_RESPONSE_INVALID, + CMD_WAITING_FOR_RESPONSE, + CMD_RESPONSE_TIMEDOUT, + CMD_RESPONSE_COPIED, +}; + +/** + * struct tee_ring_cmd - Structure of the command buffer in TEE ring + * @cmd_id: refers to &enum tee_cmd_id. Command id for the ring buffer + * interface + * @cmd_state: refers to &enum tee_cmd_state + * @status: status of TEE command execution + * @res0: reserved region + * @pdata: private data (currently unused) + * @res1: reserved region + * @buf: TEE command specific buffer + * @flag: refers to &enum cmd_resp_state + */ +struct tee_ring_cmd { + u32 cmd_id; + u32 cmd_state; + u32 status; + u32 res0[1]; + u64 pdata; + u32 res1[2]; + u8 buf[MAX_BUFFER_SIZE]; + u32 flag; + + /* Total size: 1024 bytes */ +} __packed; + +int tee_dev_init(struct psp_device *psp); +void tee_dev_destroy(struct psp_device *psp); + +#endif /* __TEE_DEV_H__ */ |