24 files changed, 12023 insertions, 0 deletions
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
new file mode 100644
index 000000000..7f5fc7055
--- /dev/null
+++ b/drivers/crypto/ccp/Kconfig
@@ -0,0 +1,45 @@
+config CRYPTO_DEV_CCP_DD
+	tristate "Secure Processor device driver"
+	depends on CPU_SUP_AMD || ARM64
+	default m
+	help
+	  Provides AMD Secure Processor device driver.
+	  If you choose 'M' here, this module will be called ccp.
+
+config CRYPTO_DEV_SP_CCP
+	bool "Cryptographic Coprocessor device"
+	default y
+	depends on CRYPTO_DEV_CCP_DD && DMADEVICES
+	select HW_RANDOM
+	select DMA_ENGINE
+	select CRYPTO_SHA1
+	select CRYPTO_SHA256
+	help
+	  Provides the support for AMD Cryptographic Coprocessor (CCP) device
+	  which can be used to offload encryption operations such as SHA, AES
+	  and more.
+
+config CRYPTO_DEV_CCP_CRYPTO
+	tristate "Encryption and hashing offload support"
+	default m
+	depends on CRYPTO_DEV_CCP_DD
+	depends on CRYPTO_DEV_SP_CCP
+	select CRYPTO_HASH
+	select CRYPTO_BLKCIPHER
+	select CRYPTO_AUTHENC
+	select CRYPTO_RSA
+	help
+	  Support for using the cryptographic API with the AMD Cryptographic
+	  Coprocessor. This module supports offload of SHA and AES algorithms.
+	  If you choose 'M' here, this module will be called ccp_crypto.
+
+config CRYPTO_DEV_SP_PSP
+	bool "Platform Security Processor (PSP) device"
+	default y
+	depends on CRYPTO_DEV_CCP_DD && X86_64
+	help
+	 Provide support for the AMD Platform Security Processor (PSP).
+	 The PSP is a dedicated processor that provides support for key
+	 management commands in Secure Encrypted Virtualization (SEV) mode,
+	 along with software-based Trusted Execution Environment (TEE) to
+	 enable third-party trusted applications.
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
new file mode 100644
index 000000000..51d1c0cf6
--- /dev/null
+++ b/drivers/crypto/ccp/Makefile
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CRYPTO_DEV_CCP_DD) += ccp.o
+ccp-objs  := sp-dev.o sp-platform.o
+ccp-$(CONFIG_CRYPTO_DEV_SP_CCP) += ccp-dev.o \
+	    ccp-ops.o \
+	    ccp-dev-v3.o \
+	    ccp-dev-v5.o \
+	    ccp-dmaengine.o \
+	    ccp-debugfs.o
+ccp-$(CONFIG_PCI) += sp-pci.o
+ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o
+
+obj-$(CONFIG_CRYPTO_DEV_CCP_CRYPTO) += ccp-crypto.o
+ccp-crypto-objs := ccp-crypto-main.o \
+		   ccp-crypto-aes.o \
+		   ccp-crypto-aes-cmac.o \
+		   ccp-crypto-aes-xts.o \
+		   ccp-crypto-aes-galois.o \
+		   ccp-crypto-des3.o \
+		   ccp-crypto-rsa.o \
+		   ccp-crypto-sha.o
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
new file mode 100644
index 000000000..3c6fe57f9
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -0,0 +1,423 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) AES CMAC crypto API support
+ *
+ * Copyright (C) 2013 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/hash.h>
+#include <crypto/internal/hash.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
+				 int ret)
+{
+	struct ahash_request *req = ahash_request_cast(async_req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	unsigned int digest_size = crypto_ahash_digestsize(tfm);
+
+	if (ret)
+		goto e_free;
+
+	if (rctx->hash_rem) {
+		/* Save remaining data to buffer */
+		unsigned int offset = rctx->nbytes - rctx->hash_rem;
+
+		scatterwalk_map_and_copy(rctx->buf, rctx->src,
+					 offset, rctx->hash_rem, 0);
+		rctx->buf_count = rctx->hash_rem;
+	} else {
+		rctx->buf_count = 0;
+	}
+
+	/* Update result area if supplied */
+	if (req->result && rctx->final)
+		memcpy(req->result, rctx->iv, digest_size);
+
+e_free:
+	sg_free_table(&rctx->data_sg);
+
+	return ret;
+}
+
+static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes,
+			      unsigned int final)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct scatterlist *sg, *cmac_key_sg = NULL;
+	unsigned int block_size =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+	unsigned int need_pad, sg_count;
+	gfp_t gfp;
+	u64 len;
+	int ret;
+
+	if (!ctx->u.aes.key_len)
+		return -EINVAL;
+
+	if (nbytes)
+		rctx->null_msg = 0;
+
+	len = (u64)rctx->buf_count + (u64)nbytes;
+
+	if (!final && (len <= block_size)) {
+		scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src,
+					 0, nbytes, 0);
+		rctx->buf_count += nbytes;
+
+		return 0;
+	}
+
+	rctx->src = req->src;
+	rctx->nbytes = nbytes;
+
+	rctx->final = final;
+	rctx->hash_rem = final ? 0 : len & (block_size - 1);
+	rctx->hash_cnt = len - rctx->hash_rem;
+	if (!final && !rctx->hash_rem) {
+		/* CCP can't do zero length final, so keep some data around */
+		rctx->hash_cnt -= block_size;
+		rctx->hash_rem = block_size;
+	}
+
+	if (final && (rctx->null_msg || (len & (block_size - 1))))
+		need_pad = 1;
+	else
+		need_pad = 0;
+
+	sg_init_one(&rctx->iv_sg, rctx->iv, sizeof(rctx->iv));
+
+	/* Build the data scatterlist table - allocate enough entries for all
+	 * possible data pieces (buffer, input data, padding)
+	 */
+	sg_count = (nbytes) ? sg_nents(req->src) + 2 : 2;
+	gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+		GFP_KERNEL : GFP_ATOMIC;
+	ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp);
+	if (ret)
+		return ret;
+
+	sg = NULL;
+	if (rctx->buf_count) {
+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
+	}
+
+	if (nbytes) {
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
+	}
+
+	if (need_pad) {
+		int pad_length = block_size - (len & (block_size - 1));
+
+		rctx->hash_cnt += pad_length;
+
+		memset(rctx->pad, 0, sizeof(rctx->pad));
+		rctx->pad[0] = 0x80;
+		sg_init_one(&rctx->pad_sg, rctx->pad, pad_length);
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->pad_sg);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
+	}
+	if (sg) {
+		sg_mark_end(sg);
+		sg = rctx->data_sg.sgl;
+	}
+
+	/* Initialize the K1/K2 scatterlist */
+	if (final)
+		cmac_key_sg = (need_pad) ? &ctx->u.aes.k2_sg
+					 : &ctx->u.aes.k1_sg;
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_AES;
+	rctx->cmd.u.aes.type = ctx->u.aes.type;
+	rctx->cmd.u.aes.mode = ctx->u.aes.mode;
+	rctx->cmd.u.aes.action = CCP_AES_ACTION_ENCRYPT;
+	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
+	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
+	rctx->cmd.u.aes.iv = &rctx->iv_sg;
+	rctx->cmd.u.aes.iv_len = AES_BLOCK_SIZE;
+	rctx->cmd.u.aes.src = sg;
+	rctx->cmd.u.aes.src_len = rctx->hash_cnt;
+	rctx->cmd.u.aes.dst = NULL;
+	rctx->cmd.u.aes.cmac_key = cmac_key_sg;
+	rctx->cmd.u.aes.cmac_key_len = ctx->u.aes.kn_len;
+	rctx->cmd.u.aes.cmac_final = final;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+
+e_free:
+	sg_free_table(&rctx->data_sg);
+
+	return ret;
+}
+
+static int ccp_aes_cmac_init(struct ahash_request *req)
+{
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+
+	memset(rctx, 0, sizeof(*rctx));
+
+	rctx->null_msg = 1;
+
+	return 0;
+}
+
+static int ccp_aes_cmac_update(struct ahash_request *req)
+{
+	return ccp_do_cmac_update(req, req->nbytes, 0);
+}
+
+static int ccp_aes_cmac_final(struct ahash_request *req)
+{
+	return ccp_do_cmac_update(req, 0, 1);
+}
+
+static int ccp_aes_cmac_finup(struct ahash_request *req)
+{
+	return ccp_do_cmac_update(req, req->nbytes, 1);
+}
+
+static int ccp_aes_cmac_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = ccp_aes_cmac_init(req);
+	if (ret)
+		return ret;
+
+	return ccp_aes_cmac_finup(req);
+}
+
+static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
+{
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_aes_cmac_exp_ctx state;
+
+	/* Don't let anything leak to 'out' */
+	memset(&state, 0, sizeof(state));
+
+	state.null_msg = rctx->null_msg;
+	memcpy(state.iv, rctx->iv, sizeof(state.iv));
+	state.buf_count = rctx->buf_count;
+	memcpy(state.buf, rctx->buf, sizeof(state.buf));
+
+	/* 'out' may not be aligned so memcpy from local variable */
+	memcpy(out, &state, sizeof(state));
+
+	return 0;
+}
+
+static int ccp_aes_cmac_import(struct ahash_request *req, const void *in)
+{
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_aes_cmac_exp_ctx state;
+
+	/* 'in' may not be aligned so memcpy to local variable */
+	memcpy(&state, in, sizeof(state));
+
+	memset(rctx, 0, sizeof(*rctx));
+	rctx->null_msg = state.null_msg;
+	memcpy(rctx->iv, state.iv, sizeof(rctx->iv));
+	rctx->buf_count = state.buf_count;
+	memcpy(rctx->buf, state.buf, sizeof(rctx->buf));
+
+	return 0;
+}
+
+static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
+			       unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct ccp_crypto_ahash_alg *alg =
+		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
+	u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo;
+	u64 rb_hi = 0x00, rb_lo = 0x87;
+	__be64 *gk;
+	int ret;
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->u.aes.type = CCP_AES_TYPE_128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->u.aes.type = CCP_AES_TYPE_192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->u.aes.type = CCP_AES_TYPE_256;
+		break;
+	default:
+		crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	ctx->u.aes.mode = alg->mode;
+
+	/* Set to zero until complete */
+	ctx->u.aes.key_len = 0;
+
+	/* Set the key for the AES cipher used to generate the keys */
+	ret = crypto_cipher_setkey(ctx->u.aes.tfm_cipher, key, key_len);
+	if (ret)
+		return ret;
+
+	/* Encrypt a block of zeroes - use key area in context */
+	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
+	crypto_cipher_encrypt_one(ctx->u.aes.tfm_cipher, ctx->u.aes.key,
+				  ctx->u.aes.key);
+
+	/* Generate K1 and K2 */
+	k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key));
+	k0_lo = be64_to_cpu(*((__be64 *)ctx->u.aes.key + 1));
+
+	k1_hi = (k0_hi << 1) | (k0_lo >> 63);
+	k1_lo = k0_lo << 1;
+	if (ctx->u.aes.key[0] & 0x80) {
+		k1_hi ^= rb_hi;
+		k1_lo ^= rb_lo;
+	}
+	gk = (__be64 *)ctx->u.aes.k1;
+	*gk = cpu_to_be64(k1_hi);
+	gk++;
+	*gk = cpu_to_be64(k1_lo);
+
+	k2_hi = (k1_hi << 1) | (k1_lo >> 63);
+	k2_lo = k1_lo << 1;
+	if (ctx->u.aes.k1[0] & 0x80) {
+		k2_hi ^= rb_hi;
+		k2_lo ^= rb_lo;
+	}
+	gk = (__be64 *)ctx->u.aes.k2;
+	*gk = cpu_to_be64(k2_hi);
+	gk++;
+	*gk = cpu_to_be64(k2_lo);
+
+	ctx->u.aes.kn_len = sizeof(ctx->u.aes.k1);
+	sg_init_one(&ctx->u.aes.k1_sg, ctx->u.aes.k1, sizeof(ctx->u.aes.k1));
+	sg_init_one(&ctx->u.aes.k2_sg, ctx->u.aes.k2, sizeof(ctx->u.aes.k2));
+
+	/* Save the supplied key */
+	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
+	memcpy(ctx->u.aes.key, key, key_len);
+	ctx->u.aes.key_len = key_len;
+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
+
+	return ret;
+}
+
+static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct crypto_cipher *cipher_tfm;
+
+	ctx->complete = ccp_aes_cmac_complete;
+	ctx->u.aes.key_len = 0;
+
+	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
+
+	cipher_tfm = crypto_alloc_cipher("aes", 0,
+					 CRYPTO_ALG_ASYNC |
+					 CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(cipher_tfm)) {
+		pr_warn("could not load aes cipher driver\n");
+		return PTR_ERR(cipher_tfm);
+	}
+	ctx->u.aes.tfm_cipher = cipher_tfm;
+
+	return 0;
+}
+
+static void ccp_aes_cmac_cra_exit(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (ctx->u.aes.tfm_cipher)
+		crypto_free_cipher(ctx->u.aes.tfm_cipher);
+	ctx->u.aes.tfm_cipher = NULL;
+}
+
+int ccp_register_aes_cmac_algs(struct list_head *head)
+{
+	struct ccp_crypto_ahash_alg *ccp_alg;
+	struct ahash_alg *alg;
+	struct hash_alg_common *halg;
+	struct crypto_alg *base;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+	ccp_alg->mode = CCP_AES_MODE_CMAC;
+
+	alg = &ccp_alg->alg;
+	alg->init = ccp_aes_cmac_init;
+	alg->update = ccp_aes_cmac_update;
+	alg->final = ccp_aes_cmac_final;
+	alg->finup = ccp_aes_cmac_finup;
+	alg->digest = ccp_aes_cmac_digest;
+	alg->export = ccp_aes_cmac_export;
+	alg->import = ccp_aes_cmac_import;
+	alg->setkey = ccp_aes_cmac_setkey;
+
+	halg = &alg->halg;
+	halg->digestsize = AES_BLOCK_SIZE;
+	halg->statesize = sizeof(struct ccp_aes_cmac_exp_ctx);
+
+	base = &halg->base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "cmac(aes)");
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "cmac-aes-ccp");
+	base->cra_flags = CRYPTO_ALG_ASYNC |
+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
+			  CRYPTO_ALG_NEED_FALLBACK;
+	base->cra_blocksize = AES_BLOCK_SIZE;
+	base->cra_ctxsize = sizeof(struct ccp_ctx);
+	base->cra_priority = CCP_CRA_PRIORITY;
+	base->cra_init = ccp_aes_cmac_cra_init;
+	base->cra_exit = ccp_aes_cmac_cra_exit;
+	base->cra_module = THIS_MODULE;
+
+	ret = crypto_register_ahash(alg);
+	if (ret) {
+		pr_err("%s ahash algorithm registration error (%d)\n",
+		       base->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
new file mode 100644
index 000000000..e5dcb29b6
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
@@ -0,0 +1,264 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) AES GCM crypto API support
+ *
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/internal/aead.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/ctr.h>
+#include <crypto/gcm.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_aes_gcm_complete(struct crypto_async_request *async_req, int ret)
+{
+	return ret;
+}
+
+static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+			      unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->u.aes.type = CCP_AES_TYPE_128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->u.aes.type = CCP_AES_TYPE_192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->u.aes.type = CCP_AES_TYPE_256;
+		break;
+	default:
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	ctx->u.aes.mode = CCP_AES_MODE_GCM;
+	ctx->u.aes.key_len = key_len;
+
+	memcpy(ctx->u.aes.key, key, key_len);
+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
+
+	return 0;
+}
+
+static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm,
+				   unsigned int authsize)
+{
+	switch (authsize) {
+	case 16:
+	case 15:
+	case 14:
+	case 13:
+	case 12:
+	case 8:
+	case 4:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt)
+{
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
+	struct ccp_aes_req_ctx *rctx = aead_request_ctx(req);
+	struct scatterlist *iv_sg = NULL;
+	unsigned int iv_len = 0;
+	int i;
+	int ret = 0;
+
+	if (!ctx->u.aes.key_len)
+		return -EINVAL;
+
+	if (ctx->u.aes.mode != CCP_AES_MODE_GCM)
+		return -EINVAL;
+
+	if (!req->iv)
+		return -EINVAL;
+
+	/*
+	 * 5 parts:
+	 *   plaintext/ciphertext input
+	 *   AAD
+	 *   key
+	 *   IV
+	 *   Destination+tag buffer
+	 */
+
+	/* Prepare the IV: 12 bytes + an integer (counter) */
+	memcpy(rctx->iv, req->iv, GCM_AES_IV_SIZE);
+	for (i = 0; i < 3; i++)
+		rctx->iv[i + GCM_AES_IV_SIZE] = 0;
+	rctx->iv[AES_BLOCK_SIZE - 1] = 1;
+
+	/* Set up a scatterlist for the IV */
+	iv_sg = &rctx->iv_sg;
+	iv_len = AES_BLOCK_SIZE;
+	sg_init_one(iv_sg, rctx->iv, iv_len);
+
+	/* The AAD + plaintext are concatenated in the src buffer */
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_AES;
+	rctx->cmd.u.aes.authsize = crypto_aead_authsize(tfm);
+	rctx->cmd.u.aes.type = ctx->u.aes.type;
+	rctx->cmd.u.aes.mode = ctx->u.aes.mode;
+	rctx->cmd.u.aes.action = encrypt;
+	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
+	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
+	rctx->cmd.u.aes.iv = iv_sg;
+	rctx->cmd.u.aes.iv_len = iv_len;
+	rctx->cmd.u.aes.src = req->src;
+	rctx->cmd.u.aes.src_len = req->cryptlen;
+	rctx->cmd.u.aes.aad_len = req->assoclen;
+
+	/* The cipher text + the tag are in the dst buffer */
+	rctx->cmd.u.aes.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_aes_gcm_encrypt(struct aead_request *req)
+{
+	return ccp_aes_gcm_crypt(req, CCP_AES_ACTION_ENCRYPT);
+}
+
+static int ccp_aes_gcm_decrypt(struct aead_request *req)
+{
+	return ccp_aes_gcm_crypt(req, CCP_AES_ACTION_DECRYPT);
+}
+
+static int ccp_aes_gcm_cra_init(struct crypto_aead *tfm)
+{
+	struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
+
+	ctx->complete = ccp_aes_gcm_complete;
+	ctx->u.aes.key_len = 0;
+
+	crypto_aead_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx));
+
+	return 0;
+}
+
+static void ccp_aes_gcm_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct aead_alg ccp_aes_gcm_defaults = {
+	.setkey = ccp_aes_gcm_setkey,
+	.setauthsize = ccp_aes_gcm_setauthsize,
+	.encrypt = ccp_aes_gcm_encrypt,
+	.decrypt = ccp_aes_gcm_decrypt,
+	.init = ccp_aes_gcm_cra_init,
+	.ivsize = GCM_AES_IV_SIZE,
+	.maxauthsize = AES_BLOCK_SIZE,
+	.base = {
+		.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC |
+				  CRYPTO_ALG_KERN_DRIVER_ONLY |
+				  CRYPTO_ALG_NEED_FALLBACK,
+		.cra_blocksize	= AES_BLOCK_SIZE,
+		.cra_ctxsize	= sizeof(struct ccp_ctx),
+		.cra_priority	= CCP_CRA_PRIORITY,
+		.cra_type	= &crypto_ablkcipher_type,
+		.cra_exit	= ccp_aes_gcm_cra_exit,
+		.cra_module	= THIS_MODULE,
+	},
+};
+
+struct ccp_aes_aead_def {
+	enum ccp_aes_mode mode;
+	unsigned int version;
+	const char *name;
+	const char *driver_name;
+	unsigned int blocksize;
+	unsigned int ivsize;
+	struct aead_alg *alg_defaults;
+};
+
+static struct ccp_aes_aead_def aes_aead_algs[] = {
+	{
+		.mode		= CCP_AES_MODE_GHASH,
+		.version	= CCP_VERSION(5, 0),
+		.name		= "gcm(aes)",
+		.driver_name	= "gcm-aes-ccp",
+		.blocksize	= 1,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_gcm_defaults,
+	},
+};
+
+static int ccp_register_aes_aead(struct list_head *head,
+				 const struct ccp_aes_aead_def *def)
+{
+	struct ccp_crypto_aead *ccp_aead;
+	struct aead_alg *alg;
+	int ret;
+
+	ccp_aead = kzalloc(sizeof(*ccp_aead), GFP_KERNEL);
+	if (!ccp_aead)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_aead->entry);
+
+	ccp_aead->mode = def->mode;
+
+	/* Copy the defaults and override as necessary */
+	alg = &ccp_aead->alg;
+	*alg = *def->alg_defaults;
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->driver_name);
+	alg->base.cra_blocksize = def->blocksize;
+	alg->base.cra_ablkcipher.ivsize = def->ivsize;
+
+	ret = crypto_register_aead(alg);
+	if (ret) {
+		pr_err("%s ablkcipher algorithm registration error (%d)\n",
+		       alg->base.cra_name, ret);
+		kfree(ccp_aead);
+		return ret;
+	}
+
+	list_add(&ccp_aead->entry, head);
+
+	return 0;
+}
+
+int ccp_register_aes_aeads(struct list_head *head)
+{
+	int i, ret;
+	unsigned int ccpversion = ccp_version();
+
+	for (i = 0; i < ARRAY_SIZE(aes_aead_algs); i++) {
+		if (aes_aead_algs[i].version > ccpversion)
+			continue;
+		ret = ccp_register_aes_aead(head, &aes_aead_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
new file mode 100644
index 000000000..94b5bcf5b
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -0,0 +1,291 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) AES XTS crypto API support
+ *
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <crypto/aes.h>
+#include <crypto/xts.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+struct ccp_aes_xts_def {
+	const char *name;
+	const char *drv_name;
+};
+
+static struct ccp_aes_xts_def aes_xts_algs[] = {
+	{
+		.name		= "xts(aes)",
+		.drv_name	= "xts-aes-ccp",
+	},
+};
+
+struct ccp_unit_size_map {
+	unsigned int size;
+	u32 value;
+};
+
+static struct ccp_unit_size_map xts_unit_sizes[] = {
+	{
+		.size   = 16,
+		.value	= CCP_XTS_AES_UNIT_SIZE_16,
+	},
+	{
+		.size   = 512,
+		.value	= CCP_XTS_AES_UNIT_SIZE_512,
+	},
+	{
+		.size   = 1024,
+		.value	= CCP_XTS_AES_UNIT_SIZE_1024,
+	},
+	{
+		.size   = 2048,
+		.value	= CCP_XTS_AES_UNIT_SIZE_2048,
+	},
+	{
+		.size   = 4096,
+		.value	= CCP_XTS_AES_UNIT_SIZE_4096,
+	},
+};
+
+static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			      unsigned int key_len)
+{
+	struct crypto_tfm *xfm = crypto_ablkcipher_tfm(tfm);
+	struct ccp_ctx *ctx = crypto_tfm_ctx(xfm);
+	unsigned int ccpversion = ccp_version();
+	int ret;
+
+	ret = xts_check_key(xfm, key, key_len);
+	if (ret)
+		return ret;
+
+	/* Version 3 devices support 128-bit keys; version 5 devices can
+	 * accommodate 128- and 256-bit keys.
+	 */
+	switch (key_len) {
+	case AES_KEYSIZE_128 * 2:
+		memcpy(ctx->u.aes.key, key, key_len);
+		break;
+	case AES_KEYSIZE_256 * 2:
+		if (ccpversion > CCP_VERSION(3, 0))
+			memcpy(ctx->u.aes.key, key, key_len);
+		break;
+	}
+	ctx->u.aes.key_len = key_len / 2;
+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
+
+	return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len);
+}
+
+static int ccp_aes_xts_crypt(struct ablkcipher_request *req,
+			     unsigned int encrypt)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	unsigned int ccpversion = ccp_version();
+	unsigned int fallback = 0;
+	unsigned int unit;
+	u32 unit_size;
+	int ret;
+
+	if (!ctx->u.aes.key_len)
+		return -EINVAL;
+
+	if (req->nbytes & (AES_BLOCK_SIZE - 1))
+		return -EINVAL;
+
+	if (!req->info)
+		return -EINVAL;
+
+	/* Check conditions under which the CCP can fulfill a request. The
+	 * device can handle input plaintext of a length that is a multiple
+	 * of the unit_size, bug the crypto implementation only supports
+	 * the unit_size being equal to the input length. This limits the
+	 * number of scenarios we can handle.
+	 */
+	unit_size = CCP_XTS_AES_UNIT_SIZE__LAST;
+	for (unit = 0; unit < ARRAY_SIZE(xts_unit_sizes); unit++) {
+		if (req->nbytes == xts_unit_sizes[unit].size) {
+			unit_size = unit;
+			break;
+		}
+	}
+	/* The CCP has restrictions on block sizes. Also, a version 3 device
+	 * only supports AES-128 operations; version 5 CCPs support both
+	 * AES-128 and -256 operations.
+	 */
+	if (unit_size == CCP_XTS_AES_UNIT_SIZE__LAST)
+		fallback = 1;
+	if ((ccpversion < CCP_VERSION(5, 0)) &&
+	    (ctx->u.aes.key_len != AES_KEYSIZE_128))
+		fallback = 1;
+	if ((ctx->u.aes.key_len != AES_KEYSIZE_128) &&
+	    (ctx->u.aes.key_len != AES_KEYSIZE_256))
+		fallback = 1;
+	if (fallback) {
+		SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher);
+
+		/* Use the fallback to process the request for any
+		 * unsupported unit sizes or key sizes
+		 */
+		skcipher_request_set_tfm(subreq, ctx->u.aes.tfm_skcipher);
+		skcipher_request_set_callback(subreq, req->base.flags,
+					      NULL, NULL);
+		skcipher_request_set_crypt(subreq, req->src, req->dst,
+					   req->nbytes, req->info);
+		ret = encrypt ? crypto_skcipher_encrypt(subreq) :
+				crypto_skcipher_decrypt(subreq);
+		skcipher_request_zero(subreq);
+		return ret;
+	}
+
+	memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
+	sg_init_one(&rctx->iv_sg, rctx->iv, AES_BLOCK_SIZE);
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_XTS_AES_128;
+	rctx->cmd.u.xts.type = CCP_AES_TYPE_128;
+	rctx->cmd.u.xts.action = (encrypt) ? CCP_AES_ACTION_ENCRYPT
+					   : CCP_AES_ACTION_DECRYPT;
+	rctx->cmd.u.xts.unit_size = unit_size;
+	rctx->cmd.u.xts.key = &ctx->u.aes.key_sg;
+	rctx->cmd.u.xts.key_len = ctx->u.aes.key_len;
+	rctx->cmd.u.xts.iv = &rctx->iv_sg;
+	rctx->cmd.u.xts.iv_len = AES_BLOCK_SIZE;
+	rctx->cmd.u.xts.src = req->src;
+	rctx->cmd.u.xts.src_len = req->nbytes;
+	rctx->cmd.u.xts.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_aes_xts_encrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_xts_crypt(req, 1);
+}
+
+static int ccp_aes_xts_decrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_xts_crypt(req, 0);
+}
+
+static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_skcipher *fallback_tfm;
+
+	ctx->complete = ccp_aes_xts_complete;
+	ctx->u.aes.key_len = 0;
+
+	fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0,
+					     CRYPTO_ALG_ASYNC |
+					     CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(fallback_tfm)) {
+		pr_warn("could not load fallback driver xts(aes)\n");
+		return PTR_ERR(fallback_tfm);
+	}
+	ctx->u.aes.tfm_skcipher = fallback_tfm;
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
+
+	return 0;
+}
+
+static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_skcipher(ctx->u.aes.tfm_skcipher);
+}
+
+static int ccp_register_aes_xts_alg(struct list_head *head,
+				    const struct ccp_aes_xts_def *def)
+{
+	struct ccp_crypto_ablkcipher_alg *ccp_alg;
+	struct crypto_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	alg = &ccp_alg->alg;
+
+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->drv_name);
+	alg->cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
+			 CRYPTO_ALG_KERN_DRIVER_ONLY |
+			 CRYPTO_ALG_NEED_FALLBACK;
+	alg->cra_blocksize = AES_BLOCK_SIZE;
+	alg->cra_ctxsize = sizeof(struct ccp_ctx);
+	alg->cra_priority = CCP_CRA_PRIORITY;
+	alg->cra_type = &crypto_ablkcipher_type;
+	alg->cra_ablkcipher.setkey = ccp_aes_xts_setkey;
+	alg->cra_ablkcipher.encrypt = ccp_aes_xts_encrypt;
+	alg->cra_ablkcipher.decrypt = ccp_aes_xts_decrypt;
+	alg->cra_ablkcipher.min_keysize = AES_MIN_KEY_SIZE * 2;
+	alg->cra_ablkcipher.max_keysize = AES_MAX_KEY_SIZE * 2;
+	alg->cra_ablkcipher.ivsize = AES_BLOCK_SIZE;
+	alg->cra_init = ccp_aes_xts_cra_init;
+	alg->cra_exit = ccp_aes_xts_cra_exit;
+	alg->cra_module = THIS_MODULE;
+
+	ret = crypto_register_alg(alg);
+	if (ret) {
+		pr_err("%s ablkcipher algorithm registration error (%d)\n",
+		       alg->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_aes_xts_algs(struct list_head *head)
+{
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(aes_xts_algs); i++) {
+		ret = ccp_register_aes_xts_alg(head, &aes_xts_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
new file mode 100644
index 000000000..3f7686993
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -0,0 +1,378 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Cryptographic Coprocessor (CCP) AES crypto API support
+ *
+ * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/ctr.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	if (ctx->u.aes.mode != CCP_AES_MODE_ECB)
+		memcpy(req->info, rctx->iv, AES_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ccp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+			  unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+	struct ccp_crypto_ablkcipher_alg *alg =
+		ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
+
+	switch (key_len) {
+	case AES_KEYSIZE_128:
+		ctx->u.aes.type = CCP_AES_TYPE_128;
+		break;
+	case AES_KEYSIZE_192:
+		ctx->u.aes.type = CCP_AES_TYPE_192;
+		break;
+	case AES_KEYSIZE_256:
+		ctx->u.aes.type = CCP_AES_TYPE_256;
+		break;
+	default:
+		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	ctx->u.aes.mode = alg->mode;
+	ctx->u.aes.key_len = key_len;
+
+	memcpy(ctx->u.aes.key, key, key_len);
+	sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len);
+
+	return 0;
+}
+
+static int ccp_aes_crypt(struct ablkcipher_request *req, bool encrypt)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct scatterlist *iv_sg = NULL;
+	unsigned int iv_len = 0;
+	int ret;
+
+	if (!ctx->u.aes.key_len)
+		return -EINVAL;
+
+	if (((ctx->u.aes.mode == CCP_AES_MODE_ECB) ||
+	     (ctx->u.aes.mode == CCP_AES_MODE_CBC)) &&
+	    (req->nbytes & (AES_BLOCK_SIZE - 1)))
+		return -EINVAL;
+
+	if (ctx->u.aes.mode != CCP_AES_MODE_ECB) {
+		if (!req->info)
+			return -EINVAL;
+
+		memcpy(rctx->iv, req->info, AES_BLOCK_SIZE);
+		iv_sg = &rctx->iv_sg;
+		iv_len = AES_BLOCK_SIZE;
+		sg_init_one(iv_sg, rctx->iv, iv_len);
+	}
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_AES;
+	rctx->cmd.u.aes.type = ctx->u.aes.type;
+	rctx->cmd.u.aes.mode = ctx->u.aes.mode;
+	rctx->cmd.u.aes.action =
+		(encrypt) ? CCP_AES_ACTION_ENCRYPT : CCP_AES_ACTION_DECRYPT;
+	rctx->cmd.u.aes.key = &ctx->u.aes.key_sg;
+	rctx->cmd.u.aes.key_len = ctx->u.aes.key_len;
+	rctx->cmd.u.aes.iv = iv_sg;
+	rctx->cmd.u.aes.iv_len = iv_len;
+	rctx->cmd.u.aes.src = req->src;
+	rctx->cmd.u.aes.src_len = req->nbytes;
+	rctx->cmd.u.aes.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_aes_encrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_crypt(req, true);
+}
+
+static int ccp_aes_decrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_crypt(req, false);
+}
+
+static int ccp_aes_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->complete = ccp_aes_complete;
+	ctx->u.aes.key_len = 0;
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
+
+	return 0;
+}
+
+static void ccp_aes_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req,
+				    int ret)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+
+	/* Restore the original pointer */
+	req->info = rctx->rfc3686_info;
+
+	return ccp_aes_complete(async_req, ret);
+}
+
+static int ccp_aes_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+				  unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+
+	if (key_len < CTR_RFC3686_NONCE_SIZE)
+		return -EINVAL;
+
+	key_len -= CTR_RFC3686_NONCE_SIZE;
+	memcpy(ctx->u.aes.nonce, key + key_len, CTR_RFC3686_NONCE_SIZE);
+
+	return ccp_aes_setkey(tfm, key, key_len);
+}
+
+static int ccp_aes_rfc3686_crypt(struct ablkcipher_request *req, bool encrypt)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req);
+	u8 *iv;
+
+	/* Initialize the CTR block */
+	iv = rctx->rfc3686_iv;
+	memcpy(iv, ctx->u.aes.nonce, CTR_RFC3686_NONCE_SIZE);
+
+	iv += CTR_RFC3686_NONCE_SIZE;
+	memcpy(iv, req->info, CTR_RFC3686_IV_SIZE);
+
+	iv += CTR_RFC3686_IV_SIZE;
+	*(__be32 *)iv = cpu_to_be32(1);
+
+	/* Point to the new IV */
+	rctx->rfc3686_info = req->info;
+	req->info = rctx->rfc3686_iv;
+
+	return ccp_aes_crypt(req, encrypt);
+}
+
+static int ccp_aes_rfc3686_encrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_rfc3686_crypt(req, true);
+}
+
+static int ccp_aes_rfc3686_decrypt(struct ablkcipher_request *req)
+{
+	return ccp_aes_rfc3686_crypt(req, false);
+}
+
+static int ccp_aes_rfc3686_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->complete = ccp_aes_rfc3686_complete;
+	ctx->u.aes.key_len = 0;
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx);
+
+	return 0;
+}
+
+static void ccp_aes_rfc3686_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct crypto_alg ccp_aes_defaults = {
+	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
+			  CRYPTO_ALG_ASYNC |
+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
+			  CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize	= AES_BLOCK_SIZE,
+	.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.cra_priority	= CCP_CRA_PRIORITY,
+	.cra_type	= &crypto_ablkcipher_type,
+	.cra_init	= ccp_aes_cra_init,
+	.cra_exit	= ccp_aes_cra_exit,
+	.cra_module	= THIS_MODULE,
+	.cra_ablkcipher	= {
+		.setkey		= ccp_aes_setkey,
+		.encrypt	= ccp_aes_encrypt,
+		.decrypt	= ccp_aes_decrypt,
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+	},
+};
+
+static struct crypto_alg ccp_aes_rfc3686_defaults = {
+	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
+			   CRYPTO_ALG_ASYNC |
+			   CRYPTO_ALG_KERN_DRIVER_ONLY |
+			   CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize	= CTR_RFC3686_BLOCK_SIZE,
+	.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.cra_priority	= CCP_CRA_PRIORITY,
+	.cra_type	= &crypto_ablkcipher_type,
+	.cra_init	= ccp_aes_rfc3686_cra_init,
+	.cra_exit	= ccp_aes_rfc3686_cra_exit,
+	.cra_module	= THIS_MODULE,
+	.cra_ablkcipher	= {
+		.setkey		= ccp_aes_rfc3686_setkey,
+		.encrypt	= ccp_aes_rfc3686_encrypt,
+		.decrypt	= ccp_aes_rfc3686_decrypt,
+		.min_keysize	= AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+	},
+};
+
+struct ccp_aes_def {
+	enum ccp_aes_mode mode;
+	unsigned int version;
+	const char *name;
+	const char *driver_name;
+	unsigned int blocksize;
+	unsigned int ivsize;
+	struct crypto_alg *alg_defaults;
+};
+
+static struct ccp_aes_def aes_algs[] = {
+	{
+		.mode		= CCP_AES_MODE_ECB,
+		.version	= CCP_VERSION(3, 0),
+		.name		= "ecb(aes)",
+		.driver_name	= "ecb-aes-ccp",
+		.blocksize	= AES_BLOCK_SIZE,
+		.ivsize		= 0,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_CBC,
+		.version	= CCP_VERSION(3, 0),
+		.name		= "cbc(aes)",
+		.driver_name	= "cbc-aes-ccp",
+		.blocksize	= AES_BLOCK_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_CFB,
+		.version	= CCP_VERSION(3, 0),
+		.name		= "cfb(aes)",
+		.driver_name	= "cfb-aes-ccp",
+		.blocksize	= 1,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_OFB,
+		.version	= CCP_VERSION(3, 0),
+		.name		= "ofb(aes)",
+		.driver_name	= "ofb-aes-ccp",
+		.blocksize	= 1,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_CTR,
+		.version	= CCP_VERSION(3, 0),
+		.name		= "ctr(aes)",
+		.driver_name	= "ctr-aes-ccp",
+		.blocksize	= 1,
+		.ivsize		= AES_BLOCK_SIZE,
+		.alg_defaults	= &ccp_aes_defaults,
+	},
+	{
+		.mode		= CCP_AES_MODE_CTR,
+		.version	= CCP_VERSION(3, 0),
+		.name		= "rfc3686(ctr(aes))",
+		.driver_name	= "rfc3686-ctr-aes-ccp",
+		.blocksize	= 1,
+		.ivsize		= CTR_RFC3686_IV_SIZE,
+		.alg_defaults	= &ccp_aes_rfc3686_defaults,
+	},
+};
+
+static int ccp_register_aes_alg(struct list_head *head,
+				const struct ccp_aes_def *def)
+{
+	struct ccp_crypto_ablkcipher_alg *ccp_alg;
+	struct crypto_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	ccp_alg->mode = def->mode;
+
+	/* Copy the defaults and override as necessary */
+	alg = &ccp_alg->alg;
+	*alg = *def->alg_defaults;
+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->driver_name);
+	alg->cra_blocksize = def->blocksize;
+	alg->cra_ablkcipher.ivsize = def->ivsize;
+
+	ret = crypto_register_alg(alg);
+	if (ret) {
+		pr_err("%s ablkcipher algorithm registration error (%d)\n",
+		       alg->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_aes_algs(struct list_head *head)
+{
+	int i, ret;
+	unsigned int ccpversion = ccp_version();
+
+	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+		if (aes_algs[i].version > ccpversion)
+			continue;
+		ret = ccp_register_aes_alg(head, &aes_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c
new file mode 100644
index 000000000..ae87b741f
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-des3.c
@@ -0,0 +1,254 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) DES3 crypto API support
+ *
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <ghook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/des.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_des3_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct ablkcipher_request *req = ablkcipher_request_cast(async_req);
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_des3_req_ctx *rctx = ablkcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	if (ctx->u.des3.mode != CCP_DES3_MODE_ECB)
+		memcpy(req->info, rctx->iv, DES3_EDE_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int ccp_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
+		unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
+	struct ccp_crypto_ablkcipher_alg *alg =
+		ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
+	u32 *flags = &tfm->base.crt_flags;
+
+
+	/* From des_generic.c:
+	 *
+	 * RFC2451:
+	 *   If the first two or last two independent 64-bit keys are
+	 *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
+	 *   same as DES.  Implementers MUST reject keys that exhibit this
+	 *   property.
+	 */
+	const u32 *K = (const u32 *)key;
+
+	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
+		     (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		return -EINVAL;
+	}
+
+	/* It's not clear that there is any support for a keysize of 112.
+	 * If needed, the caller should make K1 == K3
+	 */
+	ctx->u.des3.type = CCP_DES3_TYPE_168;
+	ctx->u.des3.mode = alg->mode;
+	ctx->u.des3.key_len = key_len;
+
+	memcpy(ctx->u.des3.key, key, key_len);
+	sg_init_one(&ctx->u.des3.key_sg, ctx->u.des3.key, key_len);
+
+	return 0;
+}
+
+static int ccp_des3_crypt(struct ablkcipher_request *req, bool encrypt)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
+	struct ccp_des3_req_ctx *rctx = ablkcipher_request_ctx(req);
+	struct scatterlist *iv_sg = NULL;
+	unsigned int iv_len = 0;
+	int ret;
+
+	if (!ctx->u.des3.key_len)
+		return -EINVAL;
+
+	if (((ctx->u.des3.mode == CCP_DES3_MODE_ECB) ||
+	     (ctx->u.des3.mode == CCP_DES3_MODE_CBC)) &&
+	    (req->nbytes & (DES3_EDE_BLOCK_SIZE - 1)))
+		return -EINVAL;
+
+	if (ctx->u.des3.mode != CCP_DES3_MODE_ECB) {
+		if (!req->info)
+			return -EINVAL;
+
+		memcpy(rctx->iv, req->info, DES3_EDE_BLOCK_SIZE);
+		iv_sg = &rctx->iv_sg;
+		iv_len = DES3_EDE_BLOCK_SIZE;
+		sg_init_one(iv_sg, rctx->iv, iv_len);
+	}
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_DES3;
+	rctx->cmd.u.des3.type = ctx->u.des3.type;
+	rctx->cmd.u.des3.mode = ctx->u.des3.mode;
+	rctx->cmd.u.des3.action = (encrypt)
+				  ? CCP_DES3_ACTION_ENCRYPT
+				  : CCP_DES3_ACTION_DECRYPT;
+	rctx->cmd.u.des3.key = &ctx->u.des3.key_sg;
+	rctx->cmd.u.des3.key_len = ctx->u.des3.key_len;
+	rctx->cmd.u.des3.iv = iv_sg;
+	rctx->cmd.u.des3.iv_len = iv_len;
+	rctx->cmd.u.des3.src = req->src;
+	rctx->cmd.u.des3.src_len = req->nbytes;
+	rctx->cmd.u.des3.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_des3_encrypt(struct ablkcipher_request *req)
+{
+	return ccp_des3_crypt(req, true);
+}
+
+static int ccp_des3_decrypt(struct ablkcipher_request *req)
+{
+	return ccp_des3_crypt(req, false);
+}
+
+static int ccp_des3_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	ctx->complete = ccp_des3_complete;
+	ctx->u.des3.key_len = 0;
+
+	tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_des3_req_ctx);
+
+	return 0;
+}
+
+static void ccp_des3_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static struct crypto_alg ccp_des3_defaults = {
+	.cra_flags	= CRYPTO_ALG_TYPE_ABLKCIPHER |
+		CRYPTO_ALG_ASYNC |
+		CRYPTO_ALG_KERN_DRIVER_ONLY |
+		CRYPTO_ALG_NEED_FALLBACK,
+	.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
+	.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.cra_priority	= CCP_CRA_PRIORITY,
+	.cra_type	= &crypto_ablkcipher_type,
+	.cra_init	= ccp_des3_cra_init,
+	.cra_exit	= ccp_des3_cra_exit,
+	.cra_module	= THIS_MODULE,
+	.cra_ablkcipher	= {
+		.setkey		= ccp_des3_setkey,
+		.encrypt	= ccp_des3_encrypt,
+		.decrypt	= ccp_des3_decrypt,
+		.min_keysize	= DES3_EDE_KEY_SIZE,
+		.max_keysize	= DES3_EDE_KEY_SIZE,
+	},
+};
+
+struct ccp_des3_def {
+	enum ccp_des3_mode mode;
+	unsigned int version;
+	const char *name;
+	const char *driver_name;
+	unsigned int blocksize;
+	unsigned int ivsize;
+	struct crypto_alg *alg_defaults;
+};
+
+static struct ccp_des3_def des3_algs[] = {
+	{
+		.mode		= CCP_DES3_MODE_ECB,
+		.version	= CCP_VERSION(5, 0),
+		.name		= "ecb(des3_ede)",
+		.driver_name	= "ecb-des3-ccp",
+		.blocksize	= DES3_EDE_BLOCK_SIZE,
+		.ivsize		= 0,
+		.alg_defaults	= &ccp_des3_defaults,
+	},
+	{
+		.mode		= CCP_DES3_MODE_CBC,
+		.version	= CCP_VERSION(5, 0),
+		.name		= "cbc(des3_ede)",
+		.driver_name	= "cbc-des3-ccp",
+		.blocksize	= DES3_EDE_BLOCK_SIZE,
+		.ivsize		= DES3_EDE_BLOCK_SIZE,
+		.alg_defaults	= &ccp_des3_defaults,
+	},
+};
+
+static int ccp_register_des3_alg(struct list_head *head,
+				 const struct ccp_des3_def *def)
+{
+	struct ccp_crypto_ablkcipher_alg *ccp_alg;
+	struct crypto_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	ccp_alg->mode = def->mode;
+
+	/* Copy the defaults and override as necessary */
+	alg = &ccp_alg->alg;
+	*alg = *def->alg_defaults;
+	snprintf(alg->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+			def->driver_name);
+	alg->cra_blocksize = def->blocksize;
+	alg->cra_ablkcipher.ivsize = def->ivsize;
+
+	ret = crypto_register_alg(alg);
+	if (ret) {
+		pr_err("%s ablkcipher algorithm registration error (%d)\n",
+				alg->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_des3_algs(struct list_head *head)
+{
+	int i, ret;
+	unsigned int ccpversion = ccp_version();
+
+	for (i = 0; i < ARRAY_SIZE(des3_algs); i++) {
+		if (des3_algs[i].version > ccpversion)
+			continue;
+		ret = ccp_register_des3_alg(head, &des3_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
new file mode 100644
index 000000000..b95d19974
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -0,0 +1,432 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) crypto API support
+ *
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/ccp.h>
+#include <linux/scatterlist.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/akcipher.h>
+
+#include "ccp-crypto.h"
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0.0");
+MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support");
+
+static unsigned int aes_disable;
+module_param(aes_disable, uint, 0444);
+MODULE_PARM_DESC(aes_disable, "Disable use of AES - any non-zero value");
+
+static unsigned int sha_disable;
+module_param(sha_disable, uint, 0444);
+MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value");
+
+static unsigned int des3_disable;
+module_param(des3_disable, uint, 0444);
+MODULE_PARM_DESC(des3_disable, "Disable use of 3DES - any non-zero value");
+
+static unsigned int rsa_disable;
+module_param(rsa_disable, uint, 0444);
+MODULE_PARM_DESC(rsa_disable, "Disable use of RSA - any non-zero value");
+
+/* List heads for the supported algorithms */
+static LIST_HEAD(hash_algs);
+static LIST_HEAD(cipher_algs);
+static LIST_HEAD(aead_algs);
+static LIST_HEAD(akcipher_algs);
+
+/* For any tfm, requests for that tfm must be returned on the order
+ * received.  With multiple queues available, the CCP can process more
+ * than one cmd at a time.  Therefore we must maintain a cmd list to insure
+ * the proper ordering of requests on a given tfm.
+ */
+struct ccp_crypto_queue {
+	struct list_head cmds;
+	struct list_head *backlog;
+	unsigned int cmd_count;
+};
+
+#define CCP_CRYPTO_MAX_QLEN	100
+
+static struct ccp_crypto_queue req_queue;
+static spinlock_t req_queue_lock;
+
+struct ccp_crypto_cmd {
+	struct list_head entry;
+
+	struct ccp_cmd *cmd;
+
+	/* Save the crypto_tfm and crypto_async_request addresses
+	 * separately to avoid any reference to a possibly invalid
+	 * crypto_async_request structure after invoking the request
+	 * callback
+	 */
+	struct crypto_async_request *req;
+	struct crypto_tfm *tfm;
+
+	/* Used for held command processing to determine state */
+	int ret;
+};
+
+struct ccp_crypto_cpu {
+	struct work_struct work;
+	struct completion completion;
+	struct ccp_crypto_cmd *crypto_cmd;
+	int err;
+};
+
+static inline bool ccp_crypto_success(int err)
+{
+	if (err && (err != -EINPROGRESS) && (err != -EBUSY))
+		return false;
+
+	return true;
+}
+
+static struct ccp_crypto_cmd *ccp_crypto_cmd_complete(
+	struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog)
+{
+	struct ccp_crypto_cmd *held = NULL, *tmp;
+	unsigned long flags;
+
+	*backlog = NULL;
+
+	spin_lock_irqsave(&req_queue_lock, flags);
+
+	/* Held cmds will be after the current cmd in the queue so start
+	 * searching for a cmd with a matching tfm for submission.
+	 */
+	tmp = crypto_cmd;
+	list_for_each_entry_continue(tmp, &req_queue.cmds, entry) {
+		if (crypto_cmd->tfm != tmp->tfm)
+			continue;
+		held = tmp;
+		break;
+	}
+
+	/* Process the backlog:
+	 *   Because cmds can be executed from any point in the cmd list
+	 *   special precautions have to be taken when handling the backlog.
+	 */
+	if (req_queue.backlog != &req_queue.cmds) {
+		/* Skip over this cmd if it is the next backlog cmd */
+		if (req_queue.backlog == &crypto_cmd->entry)
+			req_queue.backlog = crypto_cmd->entry.next;
+
+		*backlog = container_of(req_queue.backlog,
+					struct ccp_crypto_cmd, entry);
+		req_queue.backlog = req_queue.backlog->next;
+
+		/* Skip over this cmd if it is now the next backlog cmd */
+		if (req_queue.backlog == &crypto_cmd->entry)
+			req_queue.backlog = crypto_cmd->entry.next;
+	}
+
+	/* Remove the cmd entry from the list of cmds */
+	req_queue.cmd_count--;
+	list_del(&crypto_cmd->entry);
+
+	spin_unlock_irqrestore(&req_queue_lock, flags);
+
+	return held;
+}
+
+static void ccp_crypto_complete(void *data, int err)
+{
+	struct ccp_crypto_cmd *crypto_cmd = data;
+	struct ccp_crypto_cmd *held, *next, *backlog;
+	struct crypto_async_request *req = crypto_cmd->req;
+	struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm);
+	int ret;
+
+	if (err == -EINPROGRESS) {
+		/* Only propagate the -EINPROGRESS if necessary */
+		if (crypto_cmd->ret == -EBUSY) {
+			crypto_cmd->ret = -EINPROGRESS;
+			req->complete(req, -EINPROGRESS);
+		}
+
+		return;
+	}
+
+	/* Operation has completed - update the queue before invoking
+	 * the completion callbacks and retrieve the next cmd (cmd with
+	 * a matching tfm) that can be submitted to the CCP.
+	 */
+	held = ccp_crypto_cmd_complete(crypto_cmd, &backlog);
+	if (backlog) {
+		backlog->ret = -EINPROGRESS;
+		backlog->req->complete(backlog->req, -EINPROGRESS);
+	}
+
+	/* Transition the state from -EBUSY to -EINPROGRESS first */
+	if (crypto_cmd->ret == -EBUSY)
+		req->complete(req, -EINPROGRESS);
+
+	/* Completion callbacks */
+	ret = err;
+	if (ctx->complete)
+		ret = ctx->complete(req, ret);
+	req->complete(req, ret);
+
+	/* Submit the next cmd */
+	while (held) {
+		/* Since we have already queued the cmd, we must indicate that
+		 * we can backlog so as not to "lose" this request.
+		 */
+		held->cmd->flags |= CCP_CMD_MAY_BACKLOG;
+		ret = ccp_enqueue_cmd(held->cmd);
+		if (ccp_crypto_success(ret))
+			break;
+
+		/* Error occurred, report it and get the next entry */
+		ctx = crypto_tfm_ctx(held->req->tfm);
+		if (ctx->complete)
+			ret = ctx->complete(held->req, ret);
+		held->req->complete(held->req, ret);
+
+		next = ccp_crypto_cmd_complete(held, &backlog);
+		if (backlog) {
+			backlog->ret = -EINPROGRESS;
+			backlog->req->complete(backlog->req, -EINPROGRESS);
+		}
+
+		kfree(held);
+		held = next;
+	}
+
+	kfree(crypto_cmd);
+}
+
+static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd)
+{
+	struct ccp_crypto_cmd *active = NULL, *tmp;
+	unsigned long flags;
+	bool free_cmd = true;
+	int ret;
+
+	spin_lock_irqsave(&req_queue_lock, flags);
+
+	/* Check if the cmd can/should be queued */
+	if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) {
+		if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) {
+			ret = -ENOSPC;
+			goto e_lock;
+		}
+	}
+
+	/* Look for an entry with the same tfm.  If there is a cmd
+	 * with the same tfm in the list then the current cmd cannot
+	 * be submitted to the CCP yet.
+	 */
+	list_for_each_entry(tmp, &req_queue.cmds, entry) {
+		if (crypto_cmd->tfm != tmp->tfm)
+			continue;
+		active = tmp;
+		break;
+	}
+
+	ret = -EINPROGRESS;
+	if (!active) {
+		ret = ccp_enqueue_cmd(crypto_cmd->cmd);
+		if (!ccp_crypto_success(ret))
+			goto e_lock;	/* Error, don't queue it */
+	}
+
+	if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) {
+		ret = -EBUSY;
+		if (req_queue.backlog == &req_queue.cmds)
+			req_queue.backlog = &crypto_cmd->entry;
+	}
+	crypto_cmd->ret = ret;
+
+	req_queue.cmd_count++;
+	list_add_tail(&crypto_cmd->entry, &req_queue.cmds);
+
+	free_cmd = false;
+
+e_lock:
+	spin_unlock_irqrestore(&req_queue_lock, flags);
+
+	if (free_cmd)
+		kfree(crypto_cmd);
+
+	return ret;
+}
+
+/**
+ * ccp_crypto_enqueue_request - queue an crypto async request for processing
+ *				by the CCP
+ *
+ * @req: crypto_async_request struct to be processed
+ * @cmd: ccp_cmd struct to be sent to the CCP
+ */
+int ccp_crypto_enqueue_request(struct crypto_async_request *req,
+			       struct ccp_cmd *cmd)
+{
+	struct ccp_crypto_cmd *crypto_cmd;
+	gfp_t gfp;
+
+	gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
+
+	crypto_cmd = kzalloc(sizeof(*crypto_cmd), gfp);
+	if (!crypto_cmd)
+		return -ENOMEM;
+
+	/* The tfm pointer must be saved and not referenced from the
+	 * crypto_async_request (req) pointer because it is used after
+	 * completion callback for the request and the req pointer
+	 * might not be valid anymore.
+	 */
+	crypto_cmd->cmd = cmd;
+	crypto_cmd->req = req;
+	crypto_cmd->tfm = req->tfm;
+
+	cmd->callback = ccp_crypto_complete;
+	cmd->data = crypto_cmd;
+
+	if (req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG)
+		cmd->flags |= CCP_CMD_MAY_BACKLOG;
+	else
+		cmd->flags &= ~CCP_CMD_MAY_BACKLOG;
+
+	return ccp_crypto_enqueue_cmd(crypto_cmd);
+}
+
+struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
+					    struct scatterlist *sg_add)
+{
+	struct scatterlist *sg, *sg_last = NULL;
+
+	for (sg = table->sgl; sg; sg = sg_next(sg))
+		if (!sg_page(sg))
+			break;
+	if (WARN_ON(!sg))
+		return NULL;
+
+	for (; sg && sg_add; sg = sg_next(sg), sg_add = sg_next(sg_add)) {
+		sg_set_page(sg, sg_page(sg_add), sg_add->length,
+			    sg_add->offset);
+		sg_last = sg;
+	}
+	if (WARN_ON(sg_add))
+		return NULL;
+
+	return sg_last;
+}
+
+static int ccp_register_algs(void)
+{
+	int ret;
+
+	if (!aes_disable) {
+		ret = ccp_register_aes_algs(&cipher_algs);
+		if (ret)
+			return ret;
+
+		ret = ccp_register_aes_cmac_algs(&hash_algs);
+		if (ret)
+			return ret;
+
+		ret = ccp_register_aes_xts_algs(&cipher_algs);
+		if (ret)
+			return ret;
+
+		ret = ccp_register_aes_aeads(&aead_algs);
+		if (ret)
+			return ret;
+	}
+
+	if (!des3_disable) {
+		ret = ccp_register_des3_algs(&cipher_algs);
+		if (ret)
+			return ret;
+	}
+
+	if (!sha_disable) {
+		ret = ccp_register_sha_algs(&hash_algs);
+		if (ret)
+			return ret;
+	}
+
+	if (!rsa_disable) {
+		ret = ccp_register_rsa_algs(&akcipher_algs);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static void ccp_unregister_algs(void)
+{
+	struct ccp_crypto_ahash_alg *ahash_alg, *ahash_tmp;
+	struct ccp_crypto_ablkcipher_alg *ablk_alg, *ablk_tmp;
+	struct ccp_crypto_aead *aead_alg, *aead_tmp;
+	struct ccp_crypto_akcipher_alg *akc_alg, *akc_tmp;
+
+	list_for_each_entry_safe(ahash_alg, ahash_tmp, &hash_algs, entry) {
+		crypto_unregister_ahash(&ahash_alg->alg);
+		list_del(&ahash_alg->entry);
+		kfree(ahash_alg);
+	}
+
+	list_for_each_entry_safe(ablk_alg, ablk_tmp, &cipher_algs, entry) {
+		crypto_unregister_alg(&ablk_alg->alg);
+		list_del(&ablk_alg->entry);
+		kfree(ablk_alg);
+	}
+
+	list_for_each_entry_safe(aead_alg, aead_tmp, &aead_algs, entry) {
+		crypto_unregister_aead(&aead_alg->alg);
+		list_del(&aead_alg->entry);
+		kfree(aead_alg);
+	}
+
+	list_for_each_entry_safe(akc_alg, akc_tmp, &akcipher_algs, entry) {
+		crypto_unregister_akcipher(&akc_alg->alg);
+		list_del(&akc_alg->entry);
+		kfree(akc_alg);
+	}
+}
+
+static int ccp_crypto_init(void)
+{
+	int ret;
+
+	ret = ccp_present();
+	if (ret)
+		return ret;
+
+	spin_lock_init(&req_queue_lock);
+	INIT_LIST_HEAD(&req_queue.cmds);
+	req_queue.backlog = &req_queue.cmds;
+	req_queue.cmd_count = 0;
+
+	ret = ccp_register_algs();
+	if (ret)
+		ccp_unregister_algs();
+
+	return ret;
+}
+
+static void ccp_crypto_exit(void)
+{
+	ccp_unregister_algs();
+}
+
+module_init(ccp_crypto_init);
+module_exit(ccp_crypto_exit);
diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c
new file mode 100644
index 000000000..05850dfd7
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-rsa.c
@@ -0,0 +1,298 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) RSA crypto API support
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/rsa.h>
+#include <crypto/internal/akcipher.h>
+#include <crypto/akcipher.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static inline struct akcipher_request *akcipher_request_cast(
+	struct crypto_async_request *req)
+{
+	return container_of(req, struct akcipher_request, base);
+}
+
+static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen,
+					    const u8 *buf, size_t sz)
+{
+	int nskip;
+
+	for (nskip = 0; nskip < sz; nskip++)
+		if (buf[nskip])
+			break;
+	*kplen = sz - nskip;
+	*kpbuf = kzalloc(*kplen, GFP_KERNEL);
+	if (!*kpbuf)
+		return -ENOMEM;
+	memcpy(*kpbuf, buf + nskip, *kplen);
+
+	return 0;
+}
+
+static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct akcipher_request *req = akcipher_request_cast(async_req);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+
+	if (ret)
+		return ret;
+
+	req->dst_len = rctx->cmd.u.rsa.key_size >> 3;
+
+	return 0;
+}
+
+static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	return ctx->u.rsa.n_len;
+}
+
+static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt)
+{
+	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+	int ret = 0;
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_RSA;
+
+	rctx->cmd.u.rsa.key_size = ctx->u.rsa.key_len; /* in bits */
+	if (encrypt) {
+		rctx->cmd.u.rsa.exp = &ctx->u.rsa.e_sg;
+		rctx->cmd.u.rsa.exp_len = ctx->u.rsa.e_len;
+	} else {
+		rctx->cmd.u.rsa.exp = &ctx->u.rsa.d_sg;
+		rctx->cmd.u.rsa.exp_len = ctx->u.rsa.d_len;
+	}
+	rctx->cmd.u.rsa.mod = &ctx->u.rsa.n_sg;
+	rctx->cmd.u.rsa.mod_len = ctx->u.rsa.n_len;
+	rctx->cmd.u.rsa.src = req->src;
+	rctx->cmd.u.rsa.src_len = req->src_len;
+	rctx->cmd.u.rsa.dst = req->dst;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+}
+
+static int ccp_rsa_encrypt(struct akcipher_request *req)
+{
+	return ccp_rsa_crypt(req, true);
+}
+
+static int ccp_rsa_decrypt(struct akcipher_request *req)
+{
+	return ccp_rsa_crypt(req, false);
+}
+
+static int ccp_check_key_length(unsigned int len)
+{
+	/* In bits */
+	if (len < 8 || len > 4096)
+		return -EINVAL;
+	return 0;
+}
+
+static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx)
+{
+	/* Clean up old key data */
+	kzfree(ctx->u.rsa.e_buf);
+	ctx->u.rsa.e_buf = NULL;
+	ctx->u.rsa.e_len = 0;
+	kzfree(ctx->u.rsa.n_buf);
+	ctx->u.rsa.n_buf = NULL;
+	ctx->u.rsa.n_len = 0;
+	kzfree(ctx->u.rsa.d_buf);
+	ctx->u.rsa.d_buf = NULL;
+	ctx->u.rsa.d_len = 0;
+}
+
+static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
+			  unsigned int keylen, bool private)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct rsa_key raw_key;
+	int ret;
+
+	ccp_rsa_free_key_bufs(ctx);
+	memset(&raw_key, 0, sizeof(raw_key));
+
+	/* Code borrowed from crypto/rsa.c */
+	if (private)
+		ret = rsa_parse_priv_key(&raw_key, key, keylen);
+	else
+		ret = rsa_parse_pub_key(&raw_key, key, keylen);
+	if (ret)
+		goto n_key;
+
+	ret = ccp_copy_and_save_keypart(&ctx->u.rsa.n_buf, &ctx->u.rsa.n_len,
+					raw_key.n, raw_key.n_sz);
+	if (ret)
+		goto key_err;
+	sg_init_one(&ctx->u.rsa.n_sg, ctx->u.rsa.n_buf, ctx->u.rsa.n_len);
+
+	ctx->u.rsa.key_len = ctx->u.rsa.n_len << 3; /* convert to bits */
+	if (ccp_check_key_length(ctx->u.rsa.key_len)) {
+		ret = -EINVAL;
+		goto key_err;
+	}
+
+	ret = ccp_copy_and_save_keypart(&ctx->u.rsa.e_buf, &ctx->u.rsa.e_len,
+					raw_key.e, raw_key.e_sz);
+	if (ret)
+		goto key_err;
+	sg_init_one(&ctx->u.rsa.e_sg, ctx->u.rsa.e_buf, ctx->u.rsa.e_len);
+
+	if (private) {
+		ret = ccp_copy_and_save_keypart(&ctx->u.rsa.d_buf,
+						&ctx->u.rsa.d_len,
+						raw_key.d, raw_key.d_sz);
+		if (ret)
+			goto key_err;
+		sg_init_one(&ctx->u.rsa.d_sg,
+			    ctx->u.rsa.d_buf, ctx->u.rsa.d_len);
+	}
+
+	return 0;
+
+key_err:
+	ccp_rsa_free_key_bufs(ctx);
+
+n_key:
+	return ret;
+}
+
+static int ccp_rsa_setprivkey(struct crypto_akcipher *tfm, const void *key,
+			      unsigned int keylen)
+{
+	return ccp_rsa_setkey(tfm, key, keylen, true);
+}
+
+static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key,
+			     unsigned int keylen)
+{
+	return ccp_rsa_setkey(tfm, key, keylen, false);
+}
+
+static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+
+	akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx));
+	ctx->complete = ccp_rsa_complete;
+
+	return 0;
+}
+
+static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+
+	ccp_rsa_free_key_bufs(ctx);
+}
+
+static struct akcipher_alg ccp_rsa_defaults = {
+	.encrypt = ccp_rsa_encrypt,
+	.decrypt = ccp_rsa_decrypt,
+	.sign = ccp_rsa_decrypt,
+	.verify = ccp_rsa_encrypt,
+	.set_pub_key = ccp_rsa_setpubkey,
+	.set_priv_key = ccp_rsa_setprivkey,
+	.max_size = ccp_rsa_maxsize,
+	.init = ccp_rsa_init_tfm,
+	.exit = ccp_rsa_exit_tfm,
+	.base = {
+		.cra_name = "rsa",
+		.cra_driver_name = "rsa-ccp",
+		.cra_priority = CCP_CRA_PRIORITY,
+		.cra_module = THIS_MODULE,
+		.cra_ctxsize = 2 * sizeof(struct ccp_ctx),
+	},
+};
+
+struct ccp_rsa_def {
+	unsigned int version;
+	const char *name;
+	const char *driver_name;
+	unsigned int reqsize;
+	struct akcipher_alg *alg_defaults;
+};
+
+static struct ccp_rsa_def rsa_algs[] = {
+	{
+		.version	= CCP_VERSION(3, 0),
+		.name		= "rsa",
+		.driver_name	= "rsa-ccp",
+		.reqsize	= sizeof(struct ccp_rsa_req_ctx),
+		.alg_defaults	= &ccp_rsa_defaults,
+	}
+};
+
+int ccp_register_rsa_alg(struct list_head *head, const struct ccp_rsa_def *def)
+{
+	struct ccp_crypto_akcipher_alg *ccp_alg;
+	struct akcipher_alg *alg;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	alg = &ccp_alg->alg;
+	*alg = *def->alg_defaults;
+	snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->driver_name);
+	ret = crypto_register_akcipher(alg);
+	if (ret) {
+		pr_err("%s akcipher algorithm registration error (%d)\n",
+		       alg->base.cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return 0;
+}
+
+int ccp_register_rsa_algs(struct list_head *head)
+{
+	int i, ret;
+	unsigned int ccpversion = ccp_version();
+
+	/* Register the RSA algorithm in standard mode
+	 * This works for CCP v3 and later
+	 */
+	for (i = 0; i < ARRAY_SIZE(rsa_algs); i++) {
+		if (rsa_algs[i].version > ccpversion)
+			continue;
+		ret = ccp_register_rsa_alg(head, &rsa_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
new file mode 100644
index 000000000..2ca64bb57
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -0,0 +1,539 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) SHA crypto API support
+ *
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/scatterlist.h>
+#include <linux/crypto.h>
+#include <crypto/algapi.h>
+#include <crypto/hash.h>
+#include <crypto/hmac.h>
+#include <crypto/internal/hash.h>
+#include <crypto/sha.h>
+#include <crypto/scatterwalk.h>
+
+#include "ccp-crypto.h"
+
+static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
+{
+	struct ahash_request *req = ahash_request_cast(async_req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	unsigned int digest_size = crypto_ahash_digestsize(tfm);
+
+	if (ret)
+		goto e_free;
+
+	if (rctx->hash_rem) {
+		/* Save remaining data to buffer */
+		unsigned int offset = rctx->nbytes - rctx->hash_rem;
+
+		scatterwalk_map_and_copy(rctx->buf, rctx->src,
+					 offset, rctx->hash_rem, 0);
+		rctx->buf_count = rctx->hash_rem;
+	} else {
+		rctx->buf_count = 0;
+	}
+
+	/* Update result area if supplied */
+	if (req->result && rctx->final)
+		memcpy(req->result, rctx->ctx, digest_size);
+
+e_free:
+	sg_free_table(&rctx->data_sg);
+
+	return ret;
+}
+
+static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
+			     unsigned int final)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct scatterlist *sg;
+	unsigned int block_size =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+	unsigned int sg_count;
+	gfp_t gfp;
+	u64 len;
+	int ret;
+
+	len = (u64)rctx->buf_count + (u64)nbytes;
+
+	if (!final && (len <= block_size)) {
+		scatterwalk_map_and_copy(rctx->buf + rctx->buf_count, req->src,
+					 0, nbytes, 0);
+		rctx->buf_count += nbytes;
+
+		return 0;
+	}
+
+	rctx->src = req->src;
+	rctx->nbytes = nbytes;
+
+	rctx->final = final;
+	rctx->hash_rem = final ? 0 : len & (block_size - 1);
+	rctx->hash_cnt = len - rctx->hash_rem;
+	if (!final && !rctx->hash_rem) {
+		/* CCP can't do zero length final, so keep some data around */
+		rctx->hash_cnt -= block_size;
+		rctx->hash_rem = block_size;
+	}
+
+	/* Initialize the context scatterlist */
+	sg_init_one(&rctx->ctx_sg, rctx->ctx, sizeof(rctx->ctx));
+
+	sg = NULL;
+	if (rctx->buf_count && nbytes) {
+		/* Build the data scatterlist table - allocate enough entries
+		 * for both data pieces (buffer and input data)
+		 */
+		gfp = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
+			GFP_KERNEL : GFP_ATOMIC;
+		sg_count = sg_nents(req->src) + 1;
+		ret = sg_alloc_table(&rctx->data_sg, sg_count, gfp);
+		if (ret)
+			return ret;
+
+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, &rctx->buf_sg);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
+		sg = ccp_crypto_sg_table_add(&rctx->data_sg, req->src);
+		if (!sg) {
+			ret = -EINVAL;
+			goto e_free;
+		}
+		sg_mark_end(sg);
+
+		sg = rctx->data_sg.sgl;
+	} else if (rctx->buf_count) {
+		sg_init_one(&rctx->buf_sg, rctx->buf, rctx->buf_count);
+
+		sg = &rctx->buf_sg;
+	} else if (nbytes) {
+		sg = req->src;
+	}
+
+	rctx->msg_bits += (rctx->hash_cnt << 3);	/* Total in bits */
+
+	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
+	INIT_LIST_HEAD(&rctx->cmd.entry);
+	rctx->cmd.engine = CCP_ENGINE_SHA;
+	rctx->cmd.u.sha.type = rctx->type;
+	rctx->cmd.u.sha.ctx = &rctx->ctx_sg;
+
+	switch (rctx->type) {
+	case CCP_SHA_TYPE_1:
+		rctx->cmd.u.sha.ctx_len = SHA1_DIGEST_SIZE;
+		break;
+	case CCP_SHA_TYPE_224:
+		rctx->cmd.u.sha.ctx_len = SHA224_DIGEST_SIZE;
+		break;
+	case CCP_SHA_TYPE_256:
+		rctx->cmd.u.sha.ctx_len = SHA256_DIGEST_SIZE;
+		break;
+	case CCP_SHA_TYPE_384:
+		rctx->cmd.u.sha.ctx_len = SHA384_DIGEST_SIZE;
+		break;
+	case CCP_SHA_TYPE_512:
+		rctx->cmd.u.sha.ctx_len = SHA512_DIGEST_SIZE;
+		break;
+	default:
+		/* Should never get here */
+		break;
+	}
+
+	rctx->cmd.u.sha.src = sg;
+	rctx->cmd.u.sha.src_len = rctx->hash_cnt;
+	rctx->cmd.u.sha.opad = ctx->u.sha.key_len ?
+		&ctx->u.sha.opad_sg : NULL;
+	rctx->cmd.u.sha.opad_len = ctx->u.sha.key_len ?
+		ctx->u.sha.opad_count : 0;
+	rctx->cmd.u.sha.first = rctx->first;
+	rctx->cmd.u.sha.final = rctx->final;
+	rctx->cmd.u.sha.msg_bits = rctx->msg_bits;
+
+	rctx->first = 0;
+
+	ret = ccp_crypto_enqueue_request(&req->base, &rctx->cmd);
+
+	return ret;
+
+e_free:
+	sg_free_table(&rctx->data_sg);
+
+	return ret;
+}
+
+static int ccp_sha_init(struct ahash_request *req)
+{
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_crypto_ahash_alg *alg =
+		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
+	unsigned int block_size =
+		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
+
+	memset(rctx, 0, sizeof(*rctx));
+
+	rctx->type = alg->type;
+	rctx->first = 1;
+
+	if (ctx->u.sha.key_len) {
+		/* Buffer the HMAC key for first update */
+		memcpy(rctx->buf, ctx->u.sha.ipad, block_size);
+		rctx->buf_count = block_size;
+	}
+
+	return 0;
+}
+
+static int ccp_sha_update(struct ahash_request *req)
+{
+	return ccp_do_sha_update(req, req->nbytes, 0);
+}
+
+static int ccp_sha_final(struct ahash_request *req)
+{
+	return ccp_do_sha_update(req, 0, 1);
+}
+
+static int ccp_sha_finup(struct ahash_request *req)
+{
+	return ccp_do_sha_update(req, req->nbytes, 1);
+}
+
+static int ccp_sha_digest(struct ahash_request *req)
+{
+	int ret;
+
+	ret = ccp_sha_init(req);
+	if (ret)
+		return ret;
+
+	return ccp_sha_finup(req);
+}
+
+static int ccp_sha_export(struct ahash_request *req, void *out)
+{
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_sha_exp_ctx state;
+
+	/* Don't let anything leak to 'out' */
+	memset(&state, 0, sizeof(state));
+
+	state.type = rctx->type;
+	state.msg_bits = rctx->msg_bits;
+	state.first = rctx->first;
+	memcpy(state.ctx, rctx->ctx, sizeof(state.ctx));
+	state.buf_count = rctx->buf_count;
+	memcpy(state.buf, rctx->buf, sizeof(state.buf));
+
+	/* 'out' may not be aligned so memcpy from local variable */
+	memcpy(out, &state, sizeof(state));
+
+	return 0;
+}
+
+static int ccp_sha_import(struct ahash_request *req, const void *in)
+{
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_sha_exp_ctx state;
+
+	/* 'in' may not be aligned so memcpy to local variable */
+	memcpy(&state, in, sizeof(state));
+
+	memset(rctx, 0, sizeof(*rctx));
+	rctx->type = state.type;
+	rctx->msg_bits = state.msg_bits;
+	rctx->first = state.first;
+	memcpy(rctx->ctx, state.ctx, sizeof(rctx->ctx));
+	rctx->buf_count = state.buf_count;
+	memcpy(rctx->buf, state.buf, sizeof(rctx->buf));
+
+	return 0;
+}
+
+static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
+			  unsigned int key_len)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct crypto_shash *shash = ctx->u.sha.hmac_tfm;
+
+	SHASH_DESC_ON_STACK(sdesc, shash);
+
+	unsigned int block_size = crypto_shash_blocksize(shash);
+	unsigned int digest_size = crypto_shash_digestsize(shash);
+	int i, ret;
+
+	/* Set to zero until complete */
+	ctx->u.sha.key_len = 0;
+
+	/* Clear key area to provide zero padding for keys smaller
+	 * than the block size
+	 */
+	memset(ctx->u.sha.key, 0, sizeof(ctx->u.sha.key));
+
+	if (key_len > block_size) {
+		/* Must hash the input key */
+		sdesc->tfm = shash;
+		sdesc->flags = crypto_ahash_get_flags(tfm) &
+			CRYPTO_TFM_REQ_MAY_SLEEP;
+
+		ret = crypto_shash_digest(sdesc, key, key_len,
+					  ctx->u.sha.key);
+		if (ret) {
+			crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+			return -EINVAL;
+		}
+
+		key_len = digest_size;
+	} else {
+		memcpy(ctx->u.sha.key, key, key_len);
+	}
+
+	for (i = 0; i < block_size; i++) {
+		ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ HMAC_IPAD_VALUE;
+		ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ HMAC_OPAD_VALUE;
+	}
+
+	sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size);
+	ctx->u.sha.opad_count = block_size;
+
+	ctx->u.sha.key_len = key_len;
+
+	return 0;
+}
+
+static int ccp_sha_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+
+	ctx->complete = ccp_sha_complete;
+	ctx->u.sha.key_len = 0;
+
+	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx));
+
+	return 0;
+}
+
+static void ccp_sha_cra_exit(struct crypto_tfm *tfm)
+{
+}
+
+static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm);
+	struct crypto_shash *hmac_tfm;
+
+	hmac_tfm = crypto_alloc_shash(alg->child_alg, 0, 0);
+	if (IS_ERR(hmac_tfm)) {
+		pr_warn("could not load driver %s need for HMAC support\n",
+			alg->child_alg);
+		return PTR_ERR(hmac_tfm);
+	}
+
+	ctx->u.sha.hmac_tfm = hmac_tfm;
+
+	return ccp_sha_cra_init(tfm);
+}
+
+static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm)
+{
+	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	if (ctx->u.sha.hmac_tfm)
+		crypto_free_shash(ctx->u.sha.hmac_tfm);
+
+	ccp_sha_cra_exit(tfm);
+}
+
+struct ccp_sha_def {
+	unsigned int version;
+	const char *name;
+	const char *drv_name;
+	enum ccp_sha_type type;
+	u32 digest_size;
+	u32 block_size;
+};
+
+static struct ccp_sha_def sha_algs[] = {
+	{
+		.version	= CCP_VERSION(3, 0),
+		.name		= "sha1",
+		.drv_name	= "sha1-ccp",
+		.type		= CCP_SHA_TYPE_1,
+		.digest_size	= SHA1_DIGEST_SIZE,
+		.block_size	= SHA1_BLOCK_SIZE,
+	},
+	{
+		.version	= CCP_VERSION(3, 0),
+		.name		= "sha224",
+		.drv_name	= "sha224-ccp",
+		.type		= CCP_SHA_TYPE_224,
+		.digest_size	= SHA224_DIGEST_SIZE,
+		.block_size	= SHA224_BLOCK_SIZE,
+	},
+	{
+		.version	= CCP_VERSION(3, 0),
+		.name		= "sha256",
+		.drv_name	= "sha256-ccp",
+		.type		= CCP_SHA_TYPE_256,
+		.digest_size	= SHA256_DIGEST_SIZE,
+		.block_size	= SHA256_BLOCK_SIZE,
+	},
+	{
+		.version	= CCP_VERSION(5, 0),
+		.name		= "sha384",
+		.drv_name	= "sha384-ccp",
+		.type		= CCP_SHA_TYPE_384,
+		.digest_size	= SHA384_DIGEST_SIZE,
+		.block_size	= SHA384_BLOCK_SIZE,
+	},
+	{
+		.version	= CCP_VERSION(5, 0),
+		.name		= "sha512",
+		.drv_name	= "sha512-ccp",
+		.type		= CCP_SHA_TYPE_512,
+		.digest_size	= SHA512_DIGEST_SIZE,
+		.block_size	= SHA512_BLOCK_SIZE,
+	},
+};
+
+static int ccp_register_hmac_alg(struct list_head *head,
+				 const struct ccp_sha_def *def,
+				 const struct ccp_crypto_ahash_alg *base_alg)
+{
+	struct ccp_crypto_ahash_alg *ccp_alg;
+	struct ahash_alg *alg;
+	struct hash_alg_common *halg;
+	struct crypto_alg *base;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	/* Copy the base algorithm and only change what's necessary */
+	*ccp_alg = *base_alg;
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	strncpy(ccp_alg->child_alg, def->name, CRYPTO_MAX_ALG_NAME);
+
+	alg = &ccp_alg->alg;
+	alg->setkey = ccp_sha_setkey;
+
+	halg = &alg->halg;
+
+	base = &halg->base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "hmac(%s)", def->name);
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "hmac-%s",
+		 def->drv_name);
+	base->cra_init = ccp_hmac_sha_cra_init;
+	base->cra_exit = ccp_hmac_sha_cra_exit;
+
+	ret = crypto_register_ahash(alg);
+	if (ret) {
+		pr_err("%s ahash algorithm registration error (%d)\n",
+		       base->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	return ret;
+}
+
+static int ccp_register_sha_alg(struct list_head *head,
+				const struct ccp_sha_def *def)
+{
+	struct ccp_crypto_ahash_alg *ccp_alg;
+	struct ahash_alg *alg;
+	struct hash_alg_common *halg;
+	struct crypto_alg *base;
+	int ret;
+
+	ccp_alg = kzalloc(sizeof(*ccp_alg), GFP_KERNEL);
+	if (!ccp_alg)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&ccp_alg->entry);
+
+	ccp_alg->type = def->type;
+
+	alg = &ccp_alg->alg;
+	alg->init = ccp_sha_init;
+	alg->update = ccp_sha_update;
+	alg->final = ccp_sha_final;
+	alg->finup = ccp_sha_finup;
+	alg->digest = ccp_sha_digest;
+	alg->export = ccp_sha_export;
+	alg->import = ccp_sha_import;
+
+	halg = &alg->halg;
+	halg->digestsize = def->digest_size;
+	halg->statesize = sizeof(struct ccp_sha_exp_ctx);
+
+	base = &halg->base;
+	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+	snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+		 def->drv_name);
+	base->cra_flags = CRYPTO_ALG_ASYNC |
+			  CRYPTO_ALG_KERN_DRIVER_ONLY |
+			  CRYPTO_ALG_NEED_FALLBACK;
+	base->cra_blocksize = def->block_size;
+	base->cra_ctxsize = sizeof(struct ccp_ctx);
+	base->cra_priority = CCP_CRA_PRIORITY;
+	base->cra_init = ccp_sha_cra_init;
+	base->cra_exit = ccp_sha_cra_exit;
+	base->cra_module = THIS_MODULE;
+
+	ret = crypto_register_ahash(alg);
+	if (ret) {
+		pr_err("%s ahash algorithm registration error (%d)\n",
+		       base->cra_name, ret);
+		kfree(ccp_alg);
+		return ret;
+	}
+
+	list_add(&ccp_alg->entry, head);
+
+	ret = ccp_register_hmac_alg(head, def, ccp_alg);
+
+	return ret;
+}
+
+int ccp_register_sha_algs(struct list_head *head)
+{
+	int i, ret;
+	unsigned int ccpversion = ccp_version();
+
+	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
+		if (sha_algs[i].version > ccpversion)
+			continue;
+		ret = ccp_register_sha_alg(head, &sha_algs[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
new file mode 100644
index 000000000..b9fd090c4
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -0,0 +1,285 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) crypto API support
+ *
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CCP_CRYPTO_H__
+#define __CCP_CRYPTO_H__
+
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/pci.h>
+#include <linux/ccp.h>
+#include <crypto/algapi.h>
+#include <crypto/aes.h>
+#include <crypto/internal/aead.h>
+#include <crypto/aead.h>
+#include <crypto/ctr.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+#include <crypto/akcipher.h>
+#include <crypto/internal/rsa.h>
+
+#define	CCP_LOG_LEVEL	KERN_INFO
+
+#define CCP_CRA_PRIORITY	300
+
+struct ccp_crypto_ablkcipher_alg {
+	struct list_head entry;
+
+	u32 mode;
+
+	struct crypto_alg alg;
+};
+
+struct ccp_crypto_aead {
+	struct list_head entry;
+
+	u32 mode;
+
+	struct aead_alg alg;
+};
+
+struct ccp_crypto_ahash_alg {
+	struct list_head entry;
+
+	const __be32 *init;
+	u32 type;
+	u32 mode;
+
+	/* Child algorithm used for HMAC, CMAC, etc */
+	char child_alg[CRYPTO_MAX_ALG_NAME];
+
+	struct ahash_alg alg;
+};
+
+struct ccp_crypto_akcipher_alg {
+	struct list_head entry;
+
+	struct akcipher_alg alg;
+};
+
+static inline struct ccp_crypto_ablkcipher_alg *
+	ccp_crypto_ablkcipher_alg(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+
+	return container_of(alg, struct ccp_crypto_ablkcipher_alg, alg);
+}
+
+static inline struct ccp_crypto_ahash_alg *
+	ccp_crypto_ahash_alg(struct crypto_tfm *tfm)
+{
+	struct crypto_alg *alg = tfm->__crt_alg;
+	struct ahash_alg *ahash_alg;
+
+	ahash_alg = container_of(alg, struct ahash_alg, halg.base);
+
+	return container_of(ahash_alg, struct ccp_crypto_ahash_alg, alg);
+}
+
+/***** AES related defines *****/
+struct ccp_aes_ctx {
+	/* Fallback cipher for XTS with unsupported unit sizes */
+	struct crypto_skcipher *tfm_skcipher;
+
+	/* Cipher used to generate CMAC K1/K2 keys */
+	struct crypto_cipher *tfm_cipher;
+
+	enum ccp_engine engine;
+	enum ccp_aes_type type;
+	enum ccp_aes_mode mode;
+
+	struct scatterlist key_sg;
+	unsigned int key_len;
+	u8 key[AES_MAX_KEY_SIZE * 2];
+
+	u8 nonce[CTR_RFC3686_NONCE_SIZE];
+
+	/* CMAC key structures */
+	struct scatterlist k1_sg;
+	struct scatterlist k2_sg;
+	unsigned int kn_len;
+	u8 k1[AES_BLOCK_SIZE];
+	u8 k2[AES_BLOCK_SIZE];
+};
+
+struct ccp_aes_req_ctx {
+	struct scatterlist iv_sg;
+	u8 iv[AES_BLOCK_SIZE];
+
+	struct scatterlist tag_sg;
+	u8 tag[AES_BLOCK_SIZE];
+
+	/* Fields used for RFC3686 requests */
+	u8 *rfc3686_info;
+	u8 rfc3686_iv[AES_BLOCK_SIZE];
+
+	struct ccp_cmd cmd;
+};
+
+struct ccp_aes_cmac_req_ctx {
+	unsigned int null_msg;
+	unsigned int final;
+
+	struct scatterlist *src;
+	unsigned int nbytes;
+
+	u64 hash_cnt;
+	unsigned int hash_rem;
+
+	struct sg_table data_sg;
+
+	struct scatterlist iv_sg;
+	u8 iv[AES_BLOCK_SIZE];
+
+	struct scatterlist buf_sg;
+	unsigned int buf_count;
+	u8 buf[AES_BLOCK_SIZE];
+
+	struct scatterlist pad_sg;
+	unsigned int pad_count;
+	u8 pad[AES_BLOCK_SIZE];
+
+	struct ccp_cmd cmd;
+};
+
+struct ccp_aes_cmac_exp_ctx {
+	unsigned int null_msg;
+
+	u8 iv[AES_BLOCK_SIZE];
+
+	unsigned int buf_count;
+	u8 buf[AES_BLOCK_SIZE];
+};
+
+/***** 3DES related defines *****/
+struct ccp_des3_ctx {
+	enum ccp_engine engine;
+	enum ccp_des3_type type;
+	enum ccp_des3_mode mode;
+
+	struct scatterlist key_sg;
+	unsigned int key_len;
+	u8 key[AES_MAX_KEY_SIZE];
+};
+
+struct ccp_des3_req_ctx {
+	struct scatterlist iv_sg;
+	u8 iv[AES_BLOCK_SIZE];
+
+	struct ccp_cmd cmd;
+};
+
+/* SHA-related defines
+ * These values must be large enough to accommodate any variant
+ */
+#define MAX_SHA_CONTEXT_SIZE	SHA512_DIGEST_SIZE
+#define MAX_SHA_BLOCK_SIZE	SHA512_BLOCK_SIZE
+
+struct ccp_sha_ctx {
+	struct scatterlist opad_sg;
+	unsigned int opad_count;
+
+	unsigned int key_len;
+	u8 key[MAX_SHA_BLOCK_SIZE];
+	u8 ipad[MAX_SHA_BLOCK_SIZE];
+	u8 opad[MAX_SHA_BLOCK_SIZE];
+	struct crypto_shash *hmac_tfm;
+};
+
+struct ccp_sha_req_ctx {
+	enum ccp_sha_type type;
+
+	u64 msg_bits;
+
+	unsigned int first;
+	unsigned int final;
+
+	struct scatterlist *src;
+	unsigned int nbytes;
+
+	u64 hash_cnt;
+	unsigned int hash_rem;
+
+	struct sg_table data_sg;
+
+	struct scatterlist ctx_sg;
+	u8 ctx[MAX_SHA_CONTEXT_SIZE];
+
+	struct scatterlist buf_sg;
+	unsigned int buf_count;
+	u8 buf[MAX_SHA_BLOCK_SIZE];
+
+	/* CCP driver command */
+	struct ccp_cmd cmd;
+};
+
+struct ccp_sha_exp_ctx {
+	enum ccp_sha_type type;
+
+	u64 msg_bits;
+
+	unsigned int first;
+
+	u8 ctx[MAX_SHA_CONTEXT_SIZE];
+
+	unsigned int buf_count;
+	u8 buf[MAX_SHA_BLOCK_SIZE];
+};
+
+/***** RSA related defines *****/
+
+struct ccp_rsa_ctx {
+	unsigned int key_len; /* in bits */
+	struct scatterlist e_sg;
+	u8 *e_buf;
+	unsigned int e_len;
+	struct scatterlist n_sg;
+	u8 *n_buf;
+	unsigned int n_len;
+	struct scatterlist d_sg;
+	u8 *d_buf;
+	unsigned int d_len;
+};
+
+struct ccp_rsa_req_ctx {
+	struct ccp_cmd cmd;
+};
+
+#define	CCP_RSA_MAXMOD	(4 * 1024 / 8)
+#define	CCP5_RSA_MAXMOD	(16 * 1024 / 8)
+
+/***** Common Context Structure *****/
+struct ccp_ctx {
+	int (*complete)(struct crypto_async_request *req, int ret);
+
+	union {
+		struct ccp_aes_ctx aes;
+		struct ccp_rsa_ctx rsa;
+		struct ccp_sha_ctx sha;
+		struct ccp_des3_ctx des3;
+	} u;
+};
+
+int ccp_crypto_enqueue_request(struct crypto_async_request *req,
+			       struct ccp_cmd *cmd);
+struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table,
+					    struct scatterlist *sg_add);
+
+int ccp_register_aes_algs(struct list_head *head);
+int ccp_register_aes_cmac_algs(struct list_head *head);
+int ccp_register_aes_xts_algs(struct list_head *head);
+int ccp_register_aes_aeads(struct list_head *head);
+int ccp_register_sha_algs(struct list_head *head);
+int ccp_register_des3_algs(struct list_head *head);
+int ccp_register_rsa_algs(struct list_head *head);
+
+#endif
diff --git a/drivers/crypto/ccp/ccp-debugfs.c b/drivers/crypto/ccp/ccp-debugfs.c
new file mode 100644
index 000000000..1a734bd20
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-debugfs.c
@@ -0,0 +1,348 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+/* DebugFS helpers */
+#define	OBUFP		(obuf + oboff)
+#define	OBUFLEN		512
+#define	OBUFSPC		(OBUFLEN - oboff)
+#define	OSCNPRINTF(fmt, ...) \
+		scnprintf(OBUFP, OBUFSPC, fmt, ## __VA_ARGS__)
+
+#define BUFLEN	63
+
+#define	RI_VERSION_NUM	0x0000003F
+#define	RI_AES_PRESENT	0x00000040
+#define	RI_3DES_PRESENT	0x00000080
+#define	RI_SHA_PRESENT	0x00000100
+#define	RI_RSA_PRESENT	0x00000200
+#define	RI_ECC_PRESENT	0x00000400
+#define	RI_ZDE_PRESENT	0x00000800
+#define	RI_ZCE_PRESENT	0x00001000
+#define	RI_TRNG_PRESENT	0x00002000
+#define	RI_ELFC_PRESENT	0x00004000
+#define	RI_ELFC_SHIFT	14
+#define	RI_NUM_VQM	0x00078000
+#define	RI_NVQM_SHIFT	15
+#define	RI_NVQM(r)	(((r) * RI_NUM_VQM) >> RI_NVQM_SHIFT)
+#define	RI_LSB_ENTRIES	0x0FF80000
+#define	RI_NLSB_SHIFT	19
+#define	RI_NLSB(r)	(((r) * RI_LSB_ENTRIES) >> RI_NLSB_SHIFT)
+
+static ssize_t ccp5_debugfs_info_read(struct file *filp, char __user *ubuf,
+				      size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	unsigned int oboff = 0;
+	unsigned int regval;
+	ssize_t ret;
+	char *obuf;
+
+	if (!ccp)
+		return 0;
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("Device name: %s\n", ccp->name);
+	oboff += OSCNPRINTF("   RNG name: %s\n", ccp->rngname);
+	oboff += OSCNPRINTF("   # Queues: %d\n", ccp->cmd_q_count);
+	oboff += OSCNPRINTF("     # Cmds: %d\n", ccp->cmd_count);
+
+	regval = ioread32(ccp->io_regs + CMD5_PSP_CCP_VERSION);
+	oboff += OSCNPRINTF("    Version: %d\n", regval & RI_VERSION_NUM);
+	oboff += OSCNPRINTF("    Engines:");
+	if (regval & RI_AES_PRESENT)
+		oboff += OSCNPRINTF(" AES");
+	if (regval & RI_3DES_PRESENT)
+		oboff += OSCNPRINTF(" 3DES");
+	if (regval & RI_SHA_PRESENT)
+		oboff += OSCNPRINTF(" SHA");
+	if (regval & RI_RSA_PRESENT)
+		oboff += OSCNPRINTF(" RSA");
+	if (regval & RI_ECC_PRESENT)
+		oboff += OSCNPRINTF(" ECC");
+	if (regval & RI_ZDE_PRESENT)
+		oboff += OSCNPRINTF(" ZDE");
+	if (regval & RI_ZCE_PRESENT)
+		oboff += OSCNPRINTF(" ZCE");
+	if (regval & RI_TRNG_PRESENT)
+		oboff += OSCNPRINTF(" TRNG");
+	oboff += OSCNPRINTF("\n");
+	oboff += OSCNPRINTF("     Queues: %d\n",
+		   (regval & RI_NUM_VQM) >> RI_NVQM_SHIFT);
+	oboff += OSCNPRINTF("LSB Entries: %d\n",
+		   (regval & RI_LSB_ENTRIES) >> RI_NLSB_SHIFT);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* Return a formatted buffer containing the current
+ * statistics across all queues for a CCP.
+ */
+static ssize_t ccp5_debugfs_stats_read(struct file *filp, char __user *ubuf,
+				       size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	unsigned long total_xts_aes_ops = 0;
+	unsigned long total_3des_ops = 0;
+	unsigned long total_aes_ops = 0;
+	unsigned long total_sha_ops = 0;
+	unsigned long total_rsa_ops = 0;
+	unsigned long total_ecc_ops = 0;
+	unsigned long total_pt_ops = 0;
+	unsigned long total_ops = 0;
+	unsigned int oboff = 0;
+	ssize_t ret = 0;
+	unsigned int i;
+	char *obuf;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+		total_ops += cmd_q->total_ops;
+		total_aes_ops += cmd_q->total_aes_ops;
+		total_xts_aes_ops += cmd_q->total_xts_aes_ops;
+		total_3des_ops += cmd_q->total_3des_ops;
+		total_sha_ops += cmd_q->total_sha_ops;
+		total_rsa_ops += cmd_q->total_rsa_ops;
+		total_pt_ops += cmd_q->total_pt_ops;
+		total_ecc_ops += cmd_q->total_ecc_ops;
+	}
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("Total Interrupts Handled: %ld\n",
+			    ccp->total_interrupts);
+	oboff += OSCNPRINTF("        Total Operations: %ld\n",
+			    total_ops);
+	oboff += OSCNPRINTF("                     AES: %ld\n",
+			    total_aes_ops);
+	oboff += OSCNPRINTF("                 XTS AES: %ld\n",
+			    total_xts_aes_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    total_3des_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    total_sha_ops);
+	oboff += OSCNPRINTF("                     RSA: %ld\n",
+			    total_rsa_ops);
+	oboff += OSCNPRINTF("               Pass-Thru: %ld\n",
+			    total_pt_ops);
+	oboff += OSCNPRINTF("                     ECC: %ld\n",
+			    total_ecc_ops);
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* Reset the counters in a queue
+ */
+static void ccp5_debugfs_reset_queue_stats(struct ccp_cmd_queue *cmd_q)
+{
+	cmd_q->total_ops = 0L;
+	cmd_q->total_aes_ops = 0L;
+	cmd_q->total_xts_aes_ops = 0L;
+	cmd_q->total_3des_ops = 0L;
+	cmd_q->total_sha_ops = 0L;
+	cmd_q->total_rsa_ops = 0L;
+	cmd_q->total_pt_ops = 0L;
+	cmd_q->total_ecc_ops = 0L;
+}
+
+/* A value was written to the stats variable, which
+ * should be used to reset the queue counters across
+ * that device.
+ */
+static ssize_t ccp5_debugfs_stats_write(struct file *filp,
+					const char __user *ubuf,
+					size_t count, loff_t *offp)
+{
+	struct ccp_device *ccp = filp->private_data;
+	int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		ccp5_debugfs_reset_queue_stats(&ccp->cmd_q[i]);
+	ccp->total_interrupts = 0L;
+
+	return count;
+}
+
+/* Return a formatted buffer containing the current information
+ * for that queue
+ */
+static ssize_t ccp5_debugfs_queue_read(struct file *filp, char __user *ubuf,
+				       size_t count, loff_t *offp)
+{
+	struct ccp_cmd_queue *cmd_q = filp->private_data;
+	unsigned int oboff = 0;
+	unsigned int regval;
+	ssize_t ret;
+	char *obuf;
+
+	if (!cmd_q)
+		return 0;
+
+	obuf = kmalloc(OBUFLEN, GFP_KERNEL);
+	if (!obuf)
+		return -ENOMEM;
+
+	oboff += OSCNPRINTF("  Total Queue Operations: %ld\n",
+			    cmd_q->total_ops);
+	oboff += OSCNPRINTF("                     AES: %ld\n",
+			    cmd_q->total_aes_ops);
+	oboff += OSCNPRINTF("                 XTS AES: %ld\n",
+			    cmd_q->total_xts_aes_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    cmd_q->total_3des_ops);
+	oboff += OSCNPRINTF("                     SHA: %ld\n",
+			    cmd_q->total_sha_ops);
+	oboff += OSCNPRINTF("                     RSA: %ld\n",
+			    cmd_q->total_rsa_ops);
+	oboff += OSCNPRINTF("               Pass-Thru: %ld\n",
+			    cmd_q->total_pt_ops);
+	oboff += OSCNPRINTF("                     ECC: %ld\n",
+			    cmd_q->total_ecc_ops);
+
+	regval = ioread32(cmd_q->reg_int_enable);
+	oboff += OSCNPRINTF("      Enabled Interrupts:");
+	if (regval & INT_EMPTY_QUEUE)
+		oboff += OSCNPRINTF(" EMPTY");
+	if (regval & INT_QUEUE_STOPPED)
+		oboff += OSCNPRINTF(" STOPPED");
+	if (regval & INT_ERROR)
+		oboff += OSCNPRINTF(" ERROR");
+	if (regval & INT_COMPLETION)
+		oboff += OSCNPRINTF(" COMPLETION");
+	oboff += OSCNPRINTF("\n");
+
+	ret = simple_read_from_buffer(ubuf, count, offp, obuf, oboff);
+	kfree(obuf);
+
+	return ret;
+}
+
+/* A value was written to the stats variable for a
+ * queue. Reset the queue counters to this value.
+ */
+static ssize_t ccp5_debugfs_queue_write(struct file *filp,
+					const char __user *ubuf,
+					size_t count, loff_t *offp)
+{
+	struct ccp_cmd_queue *cmd_q = filp->private_data;
+
+	ccp5_debugfs_reset_queue_stats(cmd_q);
+
+	return count;
+}
+
+static const struct file_operations ccp_debugfs_info_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_info_read,
+	.write = NULL,
+};
+
+static const struct file_operations ccp_debugfs_queue_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_queue_read,
+	.write = ccp5_debugfs_queue_write,
+};
+
+static const struct file_operations ccp_debugfs_stats_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ccp5_debugfs_stats_read,
+	.write = ccp5_debugfs_stats_write,
+};
+
+static struct dentry *ccp_debugfs_dir;
+static DEFINE_MUTEX(ccp_debugfs_lock);
+
+#define	MAX_NAME_LEN	20
+
+void ccp5_debugfs_setup(struct ccp_device *ccp)
+{
+	struct ccp_cmd_queue *cmd_q;
+	char name[MAX_NAME_LEN + 1];
+	struct dentry *debugfs_info;
+	struct dentry *debugfs_stats;
+	struct dentry *debugfs_q_instance;
+	struct dentry *debugfs_q_stats;
+	int i;
+
+	if (!debugfs_initialized())
+		return;
+
+	mutex_lock(&ccp_debugfs_lock);
+	if (!ccp_debugfs_dir)
+		ccp_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	mutex_unlock(&ccp_debugfs_lock);
+	if (!ccp_debugfs_dir)
+		return;
+
+	ccp->debugfs_instance = debugfs_create_dir(ccp->name, ccp_debugfs_dir);
+	if (!ccp->debugfs_instance)
+		goto err;
+
+	debugfs_info = debugfs_create_file("info", 0400,
+					   ccp->debugfs_instance, ccp,
+					   &ccp_debugfs_info_ops);
+	if (!debugfs_info)
+		goto err;
+
+	debugfs_stats = debugfs_create_file("stats", 0600,
+					    ccp->debugfs_instance, ccp,
+					    &ccp_debugfs_stats_ops);
+	if (!debugfs_stats)
+		goto err;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		snprintf(name, MAX_NAME_LEN - 1, "q%d", cmd_q->id);
+
+		debugfs_q_instance =
+			debugfs_create_dir(name, ccp->debugfs_instance);
+		if (!debugfs_q_instance)
+			goto err;
+
+		debugfs_q_stats =
+			debugfs_create_file("stats", 0600,
+					    debugfs_q_instance, cmd_q,
+					    &ccp_debugfs_queue_ops);
+		if (!debugfs_q_stats)
+			goto err;
+	}
+
+	return;
+
+err:
+	debugfs_remove_recursive(ccp->debugfs_instance);
+}
+
+void ccp5_debugfs_destroy(void)
+{
+	debugfs_remove_recursive(ccp_debugfs_dir);
+}
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
new file mode 100644
index 000000000..ae0cc0a4d
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -0,0 +1,602 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/interrupt.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+static u32 ccp_alloc_ksb(struct ccp_cmd_queue *cmd_q, unsigned int count)
+{
+	int start;
+	struct ccp_device *ccp = cmd_q->ccp;
+
+	for (;;) {
+		mutex_lock(&ccp->sb_mutex);
+
+		start = (u32)bitmap_find_next_zero_area(ccp->sb,
+							ccp->sb_count,
+							ccp->sb_start,
+							count, 0);
+		if (start <= ccp->sb_count) {
+			bitmap_set(ccp->sb, start, count);
+
+			mutex_unlock(&ccp->sb_mutex);
+			break;
+		}
+
+		ccp->sb_avail = 0;
+
+		mutex_unlock(&ccp->sb_mutex);
+
+		/* Wait for KSB entries to become available */
+		if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
+			return 0;
+	}
+
+	return KSB_START + start;
+}
+
+static void ccp_free_ksb(struct ccp_cmd_queue *cmd_q, unsigned int start,
+			 unsigned int count)
+{
+	struct ccp_device *ccp = cmd_q->ccp;
+
+	if (!start)
+		return;
+
+	mutex_lock(&ccp->sb_mutex);
+
+	bitmap_clear(ccp->sb, start - KSB_START, count);
+
+	ccp->sb_avail = 1;
+
+	mutex_unlock(&ccp->sb_mutex);
+
+	wake_up_interruptible_all(&ccp->sb_queue);
+}
+
+static unsigned int ccp_get_free_slots(struct ccp_cmd_queue *cmd_q)
+{
+	return CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
+}
+
+static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
+{
+	struct ccp_cmd_queue *cmd_q = op->cmd_q;
+	struct ccp_device *ccp = cmd_q->ccp;
+	void __iomem *cr_addr;
+	u32 cr0, cmd;
+	unsigned int i;
+	int ret = 0;
+
+	/* We could read a status register to see how many free slots
+	 * are actually available, but reading that register resets it
+	 * and you could lose some error information.
+	 */
+	cmd_q->free_slots--;
+
+	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
+	      | (op->jobid << REQ0_JOBID_SHIFT)
+	      | REQ0_WAIT_FOR_WRITE;
+
+	if (op->soc)
+		cr0 |= REQ0_STOP_ON_COMPLETE
+		       | REQ0_INT_ON_COMPLETE;
+
+	if (op->ioc || !cmd_q->free_slots)
+		cr0 |= REQ0_INT_ON_COMPLETE;
+
+	/* Start at CMD_REQ1 */
+	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
+
+	mutex_lock(&ccp->req_mutex);
+
+	/* Write CMD_REQ1 through CMD_REQx first */
+	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
+		iowrite32(*(cr + i), cr_addr);
+
+	/* Tell the CCP to start */
+	wmb();
+	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
+
+	mutex_unlock(&ccp->req_mutex);
+
+	if (cr0 & REQ0_INT_ON_COMPLETE) {
+		/* Wait for the job to complete */
+		ret = wait_event_interruptible(cmd_q->int_queue,
+					       cmd_q->int_rcvd);
+		if (ret || cmd_q->cmd_error) {
+			/* On error delete all related jobs from the queue */
+			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
+			      | op->jobid;
+			if (cmd_q->cmd_error)
+				ccp_log_error(cmd_q->ccp,
+					      cmd_q->cmd_error);
+
+			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
+
+			if (!ret)
+				ret = -EIO;
+		} else if (op->soc) {
+			/* Delete just head job from the queue on SoC */
+			cmd = DEL_Q_ACTIVE
+			      | (cmd_q->id << DEL_Q_ID_SHIFT)
+			      | op->jobid;
+
+			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
+		}
+
+		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
+
+		cmd_q->int_rcvd = 0;
+	}
+
+	return ret;
+}
+
+static int ccp_perform_aes(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
+		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
+		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
+		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
+		| (op->sb_key << REQ1_KEY_KSB_SHIFT);
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	if (op->u.aes.mode == CCP_AES_MODE_CFB)
+		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	if (op->init)
+		cr[0] |= REQ1_INIT;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_xts_aes(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
+		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
+		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
+		| (op->sb_key << REQ1_KEY_KSB_SHIFT);
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	if (op->init)
+		cr[0] |= REQ1_INIT;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_sha(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
+		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
+		| REQ1_INIT;
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+
+	if (op->eom) {
+		cr[0] |= REQ1_EOM;
+		cr[4] = lower_32_bits(op->u.sha.msg_bits);
+		cr[5] = upper_32_bits(op->u.sha.msg_bits);
+	} else {
+		cr[4] = 0;
+		cr[5] = 0;
+	}
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_rsa(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
+		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
+		| (op->sb_key << REQ1_KEY_KSB_SHIFT)
+		| REQ1_EOM;
+	cr[1] = op->u.rsa.input_len - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (op->sb_ctx << REQ4_KSB_SHIFT)
+		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_passthru(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
+		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
+		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
+
+	if (op->src.type == CCP_MEMTYPE_SYSTEM)
+		cr[1] = op->src.u.dma.length - 1;
+	else
+		cr[1] = op->dst.u.dma.length - 1;
+
+	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
+		cr[2] = ccp_addr_lo(&op->src.u.dma);
+		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+			| ccp_addr_hi(&op->src.u.dma);
+
+		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+			cr[3] |= (op->sb_key << REQ4_KSB_SHIFT);
+	} else {
+		cr[2] = op->src.u.sb * CCP_SB_BYTES;
+		cr[3] = (CCP_MEMTYPE_SB << REQ4_MEMTYPE_SHIFT);
+	}
+
+	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
+		cr[4] = ccp_addr_lo(&op->dst.u.dma);
+		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+			| ccp_addr_hi(&op->dst.u.dma);
+	} else {
+		cr[4] = op->dst.u.sb * CCP_SB_BYTES;
+		cr[5] = (CCP_MEMTYPE_SB << REQ6_MEMTYPE_SHIFT);
+	}
+
+	if (op->eom)
+		cr[0] |= REQ1_EOM;
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static int ccp_perform_ecc(struct ccp_op *op)
+{
+	u32 cr[6];
+
+	/* Fill out the register contents for REQ1 through REQ6 */
+	cr[0] = REQ1_ECC_AFFINE_CONVERT
+		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
+		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
+		| REQ1_EOM;
+	cr[1] = op->src.u.dma.length - 1;
+	cr[2] = ccp_addr_lo(&op->src.u.dma);
+	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->src.u.dma);
+	cr[4] = ccp_addr_lo(&op->dst.u.dma);
+	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
+		| ccp_addr_hi(&op->dst.u.dma);
+
+	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
+}
+
+static void ccp_disable_queue_interrupts(struct ccp_device *ccp)
+{
+	iowrite32(0x00, ccp->io_regs + IRQ_MASK_REG);
+}
+
+static void ccp_enable_queue_interrupts(struct ccp_device *ccp)
+{
+	iowrite32(ccp->qim, ccp->io_regs + IRQ_MASK_REG);
+}
+
+static void ccp_irq_bh(unsigned long data)
+{
+	struct ccp_device *ccp = (struct ccp_device *)data;
+	struct ccp_cmd_queue *cmd_q;
+	u32 q_int, status;
+	unsigned int i;
+
+	status = ioread32(ccp->io_regs + IRQ_STATUS_REG);
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		q_int = status & (cmd_q->int_ok | cmd_q->int_err);
+		if (q_int) {
+			cmd_q->int_status = status;
+			cmd_q->q_status = ioread32(cmd_q->reg_status);
+			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+			/* On error, only save the first error value */
+			if ((q_int & cmd_q->int_err) && !cmd_q->cmd_error)
+				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+			cmd_q->int_rcvd = 1;
+
+			/* Acknowledge the interrupt and wake the kthread */
+			iowrite32(q_int, ccp->io_regs + IRQ_STATUS_REG);
+			wake_up_interruptible(&cmd_q->int_queue);
+		}
+	}
+	ccp_enable_queue_interrupts(ccp);
+}
+
+static irqreturn_t ccp_irq_handler(int irq, void *data)
+{
+	struct ccp_device *ccp = (struct ccp_device *)data;
+
+	ccp_disable_queue_interrupts(ccp);
+	if (ccp->use_tasklet)
+		tasklet_schedule(&ccp->irq_tasklet);
+	else
+		ccp_irq_bh((unsigned long)ccp);
+
+	return IRQ_HANDLED;
+}
+
+static int ccp_init(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct ccp_cmd_queue *cmd_q;
+	struct dma_pool *dma_pool;
+	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+	unsigned int qmr, i;
+	int ret;
+
+	/* Find available queues */
+	ccp->qim = 0;
+	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
+	for (i = 0; i < MAX_HW_QUEUES; i++) {
+		if (!(qmr & (1 << i)))
+			continue;
+
+		/* Allocate a dma pool for this queue */
+		snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
+			 ccp->name, i);
+		dma_pool = dma_pool_create(dma_pool_name, dev,
+					   CCP_DMAPOOL_MAX_SIZE,
+					   CCP_DMAPOOL_ALIGN, 0);
+		if (!dma_pool) {
+			dev_err(dev, "unable to allocate dma pool\n");
+			ret = -ENOMEM;
+			goto e_pool;
+		}
+
+		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
+		ccp->cmd_q_count++;
+
+		cmd_q->ccp = ccp;
+		cmd_q->id = i;
+		cmd_q->dma_pool = dma_pool;
+
+		/* Reserve 2 KSB regions for the queue */
+		cmd_q->sb_key = KSB_START + ccp->sb_start++;
+		cmd_q->sb_ctx = KSB_START + ccp->sb_start++;
+		ccp->sb_count -= 2;
+
+		/* Preset some register values and masks that are queue
+		 * number dependent
+		 */
+		cmd_q->reg_status = ccp->io_regs + CMD_Q_STATUS_BASE +
+				    (CMD_Q_STATUS_INCR * i);
+		cmd_q->reg_int_status = ccp->io_regs + CMD_Q_INT_STATUS_BASE +
+					(CMD_Q_STATUS_INCR * i);
+		cmd_q->int_ok = 1 << (i * 2);
+		cmd_q->int_err = 1 << ((i * 2) + 1);
+
+		cmd_q->free_slots = ccp_get_free_slots(cmd_q);
+
+		init_waitqueue_head(&cmd_q->int_queue);
+
+		/* Build queue interrupt mask (two interrupts per queue) */
+		ccp->qim |= cmd_q->int_ok | cmd_q->int_err;
+
+#ifdef CONFIG_ARM64
+		/* For arm64 set the recommended queue cache settings */
+		iowrite32(ccp->axcache, ccp->io_regs + CMD_Q_CACHE_BASE +
+			  (CMD_Q_CACHE_INC * i));
+#endif
+
+		dev_dbg(dev, "queue #%u available\n", i);
+	}
+	if (ccp->cmd_q_count == 0) {
+		dev_notice(dev, "no command queues available\n");
+		ret = -EIO;
+		goto e_pool;
+	}
+	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
+
+	/* Disable and clear interrupts until ready */
+	ccp_disable_queue_interrupts(ccp);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+	}
+	iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);
+
+	/* Request an irq */
+	ret = sp_request_ccp_irq(ccp->sp, ccp_irq_handler, ccp->name, ccp);
+	if (ret) {
+		dev_err(dev, "unable to allocate an IRQ\n");
+		goto e_pool;
+	}
+
+	/* Initialize the ISR tasklet? */
+	if (ccp->use_tasklet)
+		tasklet_init(&ccp->irq_tasklet, ccp_irq_bh,
+			     (unsigned long)ccp);
+
+	dev_dbg(dev, "Starting threads...\n");
+	/* Create a kthread for each queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct task_struct *kthread;
+
+		cmd_q = &ccp->cmd_q[i];
+
+		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
+					 "%s-q%u", ccp->name, cmd_q->id);
+		if (IS_ERR(kthread)) {
+			dev_err(dev, "error creating queue thread (%ld)\n",
+				PTR_ERR(kthread));
+			ret = PTR_ERR(kthread);
+			goto e_kthread;
+		}
+
+		cmd_q->kthread = kthread;
+		wake_up_process(kthread);
+	}
+
+	dev_dbg(dev, "Enabling interrupts...\n");
+	/* Enable interrupts */
+	ccp_enable_queue_interrupts(ccp);
+
+	dev_dbg(dev, "Registering device...\n");
+	ccp_add_device(ccp);
+
+	ret = ccp_register_rng(ccp);
+	if (ret)
+		goto e_kthread;
+
+	/* Register the DMA engine support */
+	ret = ccp_dmaengine_register(ccp);
+	if (ret)
+		goto e_hwrng;
+
+	return 0;
+
+e_hwrng:
+	ccp_unregister_rng(ccp);
+
+e_kthread:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+	sp_free_ccp_irq(ccp->sp, ccp);
+
+e_pool:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+	return ret;
+}
+
+static void ccp_destroy(struct ccp_device *ccp)
+{
+	struct ccp_cmd_queue *cmd_q;
+	struct ccp_cmd *cmd;
+	unsigned int i;
+
+	/* Unregister the DMA engine */
+	ccp_dmaengine_unregister(ccp);
+
+	/* Unregister the RNG */
+	ccp_unregister_rng(ccp);
+
+	/* Remove this device from the list of available units */
+	ccp_del_device(ccp);
+
+	/* Disable and clear interrupts */
+	ccp_disable_queue_interrupts(ccp);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+	}
+	iowrite32(ccp->qim, ccp->io_regs + IRQ_STATUS_REG);
+
+	/* Stop the queue kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+	sp_free_ccp_irq(ccp->sp, ccp);
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+	/* Flush the cmd and backlog queue */
+	while (!list_empty(&ccp->cmd)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+	while (!list_empty(&ccp->backlog)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+}
+
+static const struct ccp_actions ccp3_actions = {
+	.aes = ccp_perform_aes,
+	.xts_aes = ccp_perform_xts_aes,
+	.des3 = NULL,
+	.sha = ccp_perform_sha,
+	.rsa = ccp_perform_rsa,
+	.passthru = ccp_perform_passthru,
+	.ecc = ccp_perform_ecc,
+	.sballoc = ccp_alloc_ksb,
+	.sbfree = ccp_free_ksb,
+	.init = ccp_init,
+	.destroy = ccp_destroy,
+	.get_free_slots = ccp_get_free_slots,
+	.irqhandler = ccp_irq_handler,
+};
+
+const struct ccp_vdata ccpv3_platform = {
+	.version = CCP_VERSION(3, 0),
+	.setup = NULL,
+	.perform = &ccp3_actions,
+	.offset = 0,
+	.rsamax = CCP_RSA_MAX_WIDTH,
+};
+
+const struct ccp_vdata ccpv3 = {
+	.version = CCP_VERSION(3, 0),
+	.setup = NULL,
+	.perform = &ccp3_actions,
+	.offset = 0x20000,
+	.rsamax = CCP_RSA_MAX_WIDTH,
+};
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
new file mode 100644
index 000000000..44a4d2779
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -0,0 +1,1126 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/kthread.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/compiler.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+/* Allocate the requested number of contiguous LSB slots
+ * from the LSB bitmap. Look in the private range for this
+ * queue first; failing that, check the public area.
+ * If no space is available, wait around.
+ * Return: first slot number
+ */
+static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
+{
+	struct ccp_device *ccp;
+	int start;
+
+	/* First look at the map for the queue */
+	if (cmd_q->lsb >= 0) {
+		start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
+							LSB_SIZE,
+							0, count, 0);
+		if (start < LSB_SIZE) {
+			bitmap_set(cmd_q->lsbmap, start, count);
+			return start + cmd_q->lsb * LSB_SIZE;
+		}
+	}
+
+	/* No joy; try to get an entry from the shared blocks */
+	ccp = cmd_q->ccp;
+	for (;;) {
+		mutex_lock(&ccp->sb_mutex);
+
+		start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
+							MAX_LSB_CNT * LSB_SIZE,
+							0,
+							count, 0);
+		if (start <= MAX_LSB_CNT * LSB_SIZE) {
+			bitmap_set(ccp->lsbmap, start, count);
+
+			mutex_unlock(&ccp->sb_mutex);
+			return start;
+		}
+
+		ccp->sb_avail = 0;
+
+		mutex_unlock(&ccp->sb_mutex);
+
+		/* Wait for KSB entries to become available */
+		if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
+			return 0;
+	}
+}
+
+/* Free a number of LSB slots from the bitmap, starting at
+ * the indicated starting slot number.
+ */
+static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
+			 unsigned int count)
+{
+	if (!start)
+		return;
+
+	if (cmd_q->lsb == start) {
+		/* An entry from the private LSB */
+		bitmap_clear(cmd_q->lsbmap, start, count);
+	} else {
+		/* From the shared LSBs */
+		struct ccp_device *ccp = cmd_q->ccp;
+
+		mutex_lock(&ccp->sb_mutex);
+		bitmap_clear(ccp->lsbmap, start, count);
+		ccp->sb_avail = 1;
+		mutex_unlock(&ccp->sb_mutex);
+		wake_up_interruptible_all(&ccp->sb_queue);
+	}
+}
+
+/* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
+union ccp_function {
+	struct {
+		u16 size:7;
+		u16 encrypt:1;
+		u16 mode:5;
+		u16 type:2;
+	} aes;
+	struct {
+		u16 size:7;
+		u16 encrypt:1;
+		u16 rsvd:5;
+		u16 type:2;
+	} aes_xts;
+	struct {
+		u16 size:7;
+		u16 encrypt:1;
+		u16 mode:5;
+		u16 type:2;
+	} des3;
+	struct {
+		u16 rsvd1:10;
+		u16 type:4;
+		u16 rsvd2:1;
+	} sha;
+	struct {
+		u16 mode:3;
+		u16 size:12;
+	} rsa;
+	struct {
+		u16 byteswap:2;
+		u16 bitwise:3;
+		u16 reflect:2;
+		u16 rsvd:8;
+	} pt;
+	struct  {
+		u16 rsvd:13;
+	} zlib;
+	struct {
+		u16 size:10;
+		u16 type:2;
+		u16 mode:3;
+	} ecc;
+	u16 raw;
+};
+
+#define	CCP_AES_SIZE(p)		((p)->aes.size)
+#define	CCP_AES_ENCRYPT(p)	((p)->aes.encrypt)
+#define	CCP_AES_MODE(p)		((p)->aes.mode)
+#define	CCP_AES_TYPE(p)		((p)->aes.type)
+#define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
+#define	CCP_XTS_TYPE(p)		((p)->aes_xts.type)
+#define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
+#define	CCP_DES3_SIZE(p)	((p)->des3.size)
+#define	CCP_DES3_ENCRYPT(p)	((p)->des3.encrypt)
+#define	CCP_DES3_MODE(p)	((p)->des3.mode)
+#define	CCP_DES3_TYPE(p)	((p)->des3.type)
+#define	CCP_SHA_TYPE(p)		((p)->sha.type)
+#define	CCP_RSA_SIZE(p)		((p)->rsa.size)
+#define	CCP_PT_BYTESWAP(p)	((p)->pt.byteswap)
+#define	CCP_PT_BITWISE(p)	((p)->pt.bitwise)
+#define	CCP_ECC_MODE(p)		((p)->ecc.mode)
+#define	CCP_ECC_AFFINE(p)	((p)->ecc.one)
+
+/* Word 0 */
+#define CCP5_CMD_DW0(p)		((p)->dw0)
+#define CCP5_CMD_SOC(p)		(CCP5_CMD_DW0(p).soc)
+#define CCP5_CMD_IOC(p)		(CCP5_CMD_DW0(p).ioc)
+#define CCP5_CMD_INIT(p)	(CCP5_CMD_DW0(p).init)
+#define CCP5_CMD_EOM(p)		(CCP5_CMD_DW0(p).eom)
+#define CCP5_CMD_FUNCTION(p)	(CCP5_CMD_DW0(p).function)
+#define CCP5_CMD_ENGINE(p)	(CCP5_CMD_DW0(p).engine)
+#define CCP5_CMD_PROT(p)	(CCP5_CMD_DW0(p).prot)
+
+/* Word 1 */
+#define CCP5_CMD_DW1(p)		((p)->length)
+#define CCP5_CMD_LEN(p)		(CCP5_CMD_DW1(p))
+
+/* Word 2 */
+#define CCP5_CMD_DW2(p)		((p)->src_lo)
+#define CCP5_CMD_SRC_LO(p)	(CCP5_CMD_DW2(p))
+
+/* Word 3 */
+#define CCP5_CMD_DW3(p)		((p)->dw3)
+#define CCP5_CMD_SRC_MEM(p)	((p)->dw3.src_mem)
+#define CCP5_CMD_SRC_HI(p)	((p)->dw3.src_hi)
+#define CCP5_CMD_LSB_ID(p)	((p)->dw3.lsb_cxt_id)
+#define CCP5_CMD_FIX_SRC(p)	((p)->dw3.fixed)
+
+/* Words 4/5 */
+#define CCP5_CMD_DW4(p)		((p)->dw4)
+#define CCP5_CMD_DST_LO(p)	(CCP5_CMD_DW4(p).dst_lo)
+#define CCP5_CMD_DW5(p)		((p)->dw5.fields.dst_hi)
+#define CCP5_CMD_DST_HI(p)	(CCP5_CMD_DW5(p))
+#define CCP5_CMD_DST_MEM(p)	((p)->dw5.fields.dst_mem)
+#define CCP5_CMD_FIX_DST(p)	((p)->dw5.fields.fixed)
+#define CCP5_CMD_SHA_LO(p)	((p)->dw4.sha_len_lo)
+#define CCP5_CMD_SHA_HI(p)	((p)->dw5.sha_len_hi)
+
+/* Word 6/7 */
+#define CCP5_CMD_DW6(p)		((p)->key_lo)
+#define CCP5_CMD_KEY_LO(p)	(CCP5_CMD_DW6(p))
+#define CCP5_CMD_DW7(p)		((p)->dw7)
+#define CCP5_CMD_KEY_HI(p)	((p)->dw7.key_hi)
+#define CCP5_CMD_KEY_MEM(p)	((p)->dw7.key_mem)
+
+static inline u32 low_address(unsigned long addr)
+{
+	return (u64)addr & 0x0ffffffff;
+}
+
+static inline u32 high_address(unsigned long addr)
+{
+	return ((u64)addr >> 32) & 0x00000ffff;
+}
+
+static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
+{
+	unsigned int head_idx, n;
+	u32 head_lo, queue_start;
+
+	queue_start = low_address(cmd_q->qdma_tail);
+	head_lo = ioread32(cmd_q->reg_head_lo);
+	head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
+
+	n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
+
+	return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
+}
+
+static int ccp5_do_cmd(struct ccp5_desc *desc,
+		       struct ccp_cmd_queue *cmd_q)
+{
+	u32 *mP;
+	__le32 *dP;
+	u32 tail;
+	int	i;
+	int ret = 0;
+
+	cmd_q->total_ops++;
+
+	if (CCP5_CMD_SOC(desc)) {
+		CCP5_CMD_IOC(desc) = 1;
+		CCP5_CMD_SOC(desc) = 0;
+	}
+	mutex_lock(&cmd_q->q_mutex);
+
+	mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
+	dP = (__le32 *) desc;
+	for (i = 0; i < 8; i++)
+		mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
+
+	cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
+
+	/* The data used by this command must be flushed to memory */
+	wmb();
+
+	/* Write the new tail address back to the queue register */
+	tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+	iowrite32(tail, cmd_q->reg_tail_lo);
+
+	/* Turn the queue back on using our cached control register */
+	iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
+	mutex_unlock(&cmd_q->q_mutex);
+
+	if (CCP5_CMD_IOC(desc)) {
+		/* Wait for the job to complete */
+		ret = wait_event_interruptible(cmd_q->int_queue,
+					       cmd_q->int_rcvd);
+		if (ret || cmd_q->cmd_error) {
+			/* Log the error and flush the queue by
+			 * moving the head pointer
+			 */
+			if (cmd_q->cmd_error)
+				ccp_log_error(cmd_q->ccp,
+					      cmd_q->cmd_error);
+			iowrite32(tail, cmd_q->reg_head_lo);
+			if (!ret)
+				ret = -EIO;
+		}
+		cmd_q->int_rcvd = 0;
+	}
+
+	return ret;
+}
+
+static int ccp5_perform_aes(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+	op->cmd_q->total_aes_ops++;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = op->init;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_AES_ENCRYPT(&function) = op->u.aes.action;
+	CCP_AES_MODE(&function) = op->u.aes.mode;
+	CCP_AES_TYPE(&function) = op->u.aes.type;
+	CCP_AES_SIZE(&function) = op->u.aes.size;
+
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+	CCP5_CMD_KEY_HI(&desc) = 0;
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_xts_aes(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+	op->cmd_q->total_xts_aes_ops++;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = op->init;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_XTS_TYPE(&function) = op->u.xts.type;
+	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
+	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+	CCP5_CMD_KEY_HI(&desc) =  0;
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_sha(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+
+	op->cmd_q->total_sha_ops++;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = 1;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_SHA_TYPE(&function) = op->u.sha.type;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+	if (op->eom) {
+		CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
+		CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
+	} else {
+		CCP5_CMD_SHA_LO(&desc) = 0;
+		CCP5_CMD_SHA_HI(&desc) = 0;
+	}
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_des3(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
+
+	op->cmd_q->total_3des_ops++;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, sizeof(struct ccp5_desc));
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_DES3;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = op->init;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_DES3_ENCRYPT(&function) = op->u.des3.action;
+	CCP_DES3_MODE(&function) = op->u.des3.mode;
+	CCP_DES3_TYPE(&function) = op->u.des3.type;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
+	CCP5_CMD_KEY_HI(&desc) = 0;
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
+	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_rsa(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+
+	op->cmd_q->total_rsa_ops++;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
+
+	CCP5_CMD_SOC(&desc) = op->soc;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = 0;
+	CCP5_CMD_EOM(&desc) = 1;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_RSA_SIZE(&function) = (op->u.rsa.mod_size + 7) >> 3;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
+
+	/* Source is from external memory */
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	/* Destination is in external memory */
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	/* Key (Exponent) is in external memory */
+	CCP5_CMD_KEY_LO(&desc) = ccp_addr_lo(&op->exp.u.dma);
+	CCP5_CMD_KEY_HI(&desc) = ccp_addr_hi(&op->exp.u.dma);
+	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_passthru(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+	struct ccp_dma_info *saddr = &op->src.u.dma;
+	struct ccp_dma_info *daddr = &op->dst.u.dma;
+
+
+	op->cmd_q->total_pt_ops++;
+
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
+
+	CCP5_CMD_SOC(&desc) = 0;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = 0;
+	CCP5_CMD_EOM(&desc) = op->eom;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
+	CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	/* Length of source data is always 256 bytes */
+	if (op->src.type == CCP_MEMTYPE_SYSTEM)
+		CCP5_CMD_LEN(&desc) = saddr->length;
+	else
+		CCP5_CMD_LEN(&desc) = daddr->length;
+
+	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
+		CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+		CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+			CCP5_CMD_LSB_ID(&desc) = op->sb_key;
+	} else {
+		u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
+
+		CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
+		CCP5_CMD_SRC_HI(&desc) = 0;
+		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
+	}
+
+	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
+		CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+		CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+	} else {
+		u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
+
+		CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
+		CCP5_CMD_DST_HI(&desc) = 0;
+		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
+	}
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp5_perform_ecc(struct ccp_op *op)
+{
+	struct ccp5_desc desc;
+	union ccp_function function;
+
+	op->cmd_q->total_ecc_ops++;
+
+	/* Zero out all the fields of the command desc */
+	memset(&desc, 0, Q_DESC_SIZE);
+
+	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
+
+	CCP5_CMD_SOC(&desc) = 0;
+	CCP5_CMD_IOC(&desc) = 1;
+	CCP5_CMD_INIT(&desc) = 0;
+	CCP5_CMD_EOM(&desc) = 1;
+	CCP5_CMD_PROT(&desc) = 0;
+
+	function.raw = 0;
+	function.ecc.mode = op->u.ecc.function;
+	CCP5_CMD_FUNCTION(&desc) = function.raw;
+
+	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
+
+	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
+	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
+	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
+	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
+	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
+
+	return ccp5_do_cmd(&desc, op->cmd_q);
+}
+
+static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
+{
+	int q_mask = 1 << cmd_q->id;
+	int queues = 0;
+	int j;
+
+	/* Build a bit mask to know which LSBs this queue has access to.
+	 * Don't bother with segment 0 as it has special privileges.
+	 */
+	for (j = 1; j < MAX_LSB_CNT; j++) {
+		if (status & q_mask)
+			bitmap_set(cmd_q->lsbmask, j, 1);
+		status >>= LSB_REGION_WIDTH;
+	}
+	queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
+	dev_dbg(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
+		 cmd_q->id, queues);
+
+	return queues ? 0 : -EINVAL;
+}
+
+static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
+					int lsb_cnt, int n_lsbs,
+					unsigned long *lsb_pub)
+{
+	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
+	int bitno;
+	int qlsb_wgt;
+	int i;
+
+	/* For each queue:
+	 * If the count of potential LSBs available to a queue matches the
+	 * ordinal given to us in lsb_cnt:
+	 * Copy the mask of possible LSBs for this queue into "qlsb";
+	 * For each bit in qlsb, see if the corresponding bit in the
+	 * aggregation mask is set; if so, we have a match.
+	 *     If we have a match, clear the bit in the aggregation to
+	 *     mark it as no longer available.
+	 *     If there is no match, clear the bit in qlsb and keep looking.
+	 */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+		qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
+
+		if (qlsb_wgt == lsb_cnt) {
+			bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
+
+			bitno = find_first_bit(qlsb, MAX_LSB_CNT);
+			while (bitno < MAX_LSB_CNT) {
+				if (test_bit(bitno, lsb_pub)) {
+					/* We found an available LSB
+					 * that this queue can access
+					 */
+					cmd_q->lsb = bitno;
+					bitmap_clear(lsb_pub, bitno, 1);
+					dev_dbg(ccp->dev,
+						 "Queue %d gets LSB %d\n",
+						 i, bitno);
+					break;
+				}
+				bitmap_clear(qlsb, bitno, 1);
+				bitno = find_first_bit(qlsb, MAX_LSB_CNT);
+			}
+			if (bitno >= MAX_LSB_CNT)
+				return -EINVAL;
+			n_lsbs--;
+		}
+	}
+	return n_lsbs;
+}
+
+/* For each queue, from the most- to least-constrained:
+ * find an LSB that can be assigned to the queue. If there are N queues that
+ * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
+ * dedicated LSB. Remaining LSB regions become a shared resource.
+ * If we have fewer LSBs than queues, all LSB regions become shared resources.
+ */
+static int ccp_assign_lsbs(struct ccp_device *ccp)
+{
+	DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
+	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
+	int n_lsbs = 0;
+	int bitno;
+	int i, lsb_cnt;
+	int rc = 0;
+
+	bitmap_zero(lsb_pub, MAX_LSB_CNT);
+
+	/* Create an aggregate bitmap to get a total count of available LSBs */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		bitmap_or(lsb_pub,
+			  lsb_pub, ccp->cmd_q[i].lsbmask,
+			  MAX_LSB_CNT);
+
+	n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
+
+	if (n_lsbs >= ccp->cmd_q_count) {
+		/* We have enough LSBS to give every queue a private LSB.
+		 * Brute force search to start with the queues that are more
+		 * constrained in LSB choice. When an LSB is privately
+		 * assigned, it is removed from the public mask.
+		 * This is an ugly N squared algorithm with some optimization.
+		 */
+		for (lsb_cnt = 1;
+		     n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
+		     lsb_cnt++) {
+			rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
+							  lsb_pub);
+			if (rc < 0)
+				return -EINVAL;
+			n_lsbs = rc;
+		}
+	}
+
+	rc = 0;
+	/* What's left of the LSBs, according to the public mask, now become
+	 * shared. Any zero bits in the lsb_pub mask represent an LSB region
+	 * that can't be used as a shared resource, so mark the LSB slots for
+	 * them as "in use".
+	 */
+	bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
+
+	bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
+	while (bitno < MAX_LSB_CNT) {
+		bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
+		bitmap_set(qlsb, bitno, 1);
+		bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
+	}
+
+	return rc;
+}
+
+static void ccp5_disable_queue_interrupts(struct ccp_device *ccp)
+{
+	unsigned int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		iowrite32(0x0, ccp->cmd_q[i].reg_int_enable);
+}
+
+static void ccp5_enable_queue_interrupts(struct ccp_device *ccp)
+{
+	unsigned int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		iowrite32(SUPPORTED_INTERRUPTS, ccp->cmd_q[i].reg_int_enable);
+}
+
+static void ccp5_irq_bh(unsigned long data)
+{
+	struct ccp_device *ccp = (struct ccp_device *)data;
+	u32 status;
+	unsigned int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
+
+		status = ioread32(cmd_q->reg_interrupt_status);
+
+		if (status) {
+			cmd_q->int_status = status;
+			cmd_q->q_status = ioread32(cmd_q->reg_status);
+			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
+
+			/* On error, only save the first error value */
+			if ((status & INT_ERROR) && !cmd_q->cmd_error)
+				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+			cmd_q->int_rcvd = 1;
+
+			/* Acknowledge the interrupt and wake the kthread */
+			iowrite32(status, cmd_q->reg_interrupt_status);
+			wake_up_interruptible(&cmd_q->int_queue);
+		}
+	}
+	ccp5_enable_queue_interrupts(ccp);
+}
+
+static irqreturn_t ccp5_irq_handler(int irq, void *data)
+{
+	struct ccp_device *ccp = (struct ccp_device *)data;
+
+	ccp5_disable_queue_interrupts(ccp);
+	ccp->total_interrupts++;
+	if (ccp->use_tasklet)
+		tasklet_schedule(&ccp->irq_tasklet);
+	else
+		ccp5_irq_bh((unsigned long)ccp);
+	return IRQ_HANDLED;
+}
+
+static int ccp5_init(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct ccp_cmd_queue *cmd_q;
+	struct dma_pool *dma_pool;
+	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+	unsigned int qmr, i;
+	u64 status;
+	u32 status_lo, status_hi;
+	int ret;
+
+	/* Find available queues */
+	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
+	for (i = 0; i < MAX_HW_QUEUES; i++) {
+
+		if (!(qmr & (1 << i)))
+			continue;
+
+		/* Allocate a dma pool for this queue */
+		snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
+			 ccp->name, i);
+		dma_pool = dma_pool_create(dma_pool_name, dev,
+					   CCP_DMAPOOL_MAX_SIZE,
+					   CCP_DMAPOOL_ALIGN, 0);
+		if (!dma_pool) {
+			dev_err(dev, "unable to allocate dma pool\n");
+			ret = -ENOMEM;
+		}
+
+		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
+		ccp->cmd_q_count++;
+
+		cmd_q->ccp = ccp;
+		cmd_q->id = i;
+		cmd_q->dma_pool = dma_pool;
+		mutex_init(&cmd_q->q_mutex);
+
+		/* Page alignment satisfies our needs for N <= 128 */
+		BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
+		cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
+		cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
+						   &cmd_q->qbase_dma,
+						   GFP_KERNEL);
+		if (!cmd_q->qbase) {
+			dev_err(dev, "unable to allocate command queue\n");
+			ret = -ENOMEM;
+			goto e_pool;
+		}
+
+		cmd_q->qidx = 0;
+		/* Preset some register values and masks that are queue
+		 * number dependent
+		 */
+		cmd_q->reg_control = ccp->io_regs +
+				     CMD5_Q_STATUS_INCR * (i + 1);
+		cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
+		cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
+		cmd_q->reg_int_enable = cmd_q->reg_control +
+					CMD5_Q_INT_ENABLE_BASE;
+		cmd_q->reg_interrupt_status = cmd_q->reg_control +
+					      CMD5_Q_INTERRUPT_STATUS_BASE;
+		cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
+		cmd_q->reg_int_status = cmd_q->reg_control +
+					CMD5_Q_INT_STATUS_BASE;
+		cmd_q->reg_dma_status = cmd_q->reg_control +
+					CMD5_Q_DMA_STATUS_BASE;
+		cmd_q->reg_dma_read_status = cmd_q->reg_control +
+					     CMD5_Q_DMA_READ_STATUS_BASE;
+		cmd_q->reg_dma_write_status = cmd_q->reg_control +
+					      CMD5_Q_DMA_WRITE_STATUS_BASE;
+
+		init_waitqueue_head(&cmd_q->int_queue);
+
+		dev_dbg(dev, "queue #%u available\n", i);
+	}
+
+	if (ccp->cmd_q_count == 0) {
+		dev_notice(dev, "no command queues available\n");
+		ret = -EIO;
+		goto e_pool;
+	}
+
+	/* Turn off the queues and disable interrupts until ready */
+	ccp5_disable_queue_interrupts(ccp);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		cmd_q->qcontrol = 0; /* Start with nothing */
+		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+
+		/* Clear the interrupt status */
+		iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status);
+	}
+
+	dev_dbg(dev, "Requesting an IRQ...\n");
+	/* Request an irq */
+	ret = sp_request_ccp_irq(ccp->sp, ccp5_irq_handler, ccp->name, ccp);
+	if (ret) {
+		dev_err(dev, "unable to allocate an IRQ\n");
+		goto e_pool;
+	}
+	/* Initialize the ISR tasklet */
+	if (ccp->use_tasklet)
+		tasklet_init(&ccp->irq_tasklet, ccp5_irq_bh,
+			     (unsigned long)ccp);
+
+	dev_dbg(dev, "Loading LSB map...\n");
+	/* Copy the private LSB mask to the public registers */
+	status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
+	status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
+	iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
+	iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
+	status = ((u64)status_hi<<30) | (u64)status_lo;
+
+	dev_dbg(dev, "Configuring virtual queues...\n");
+	/* Configure size of each virtual queue accessible to host */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		u32 dma_addr_lo;
+		u32 dma_addr_hi;
+
+		cmd_q = &ccp->cmd_q[i];
+
+		cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
+		cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
+
+		cmd_q->qdma_tail = cmd_q->qbase_dma;
+		dma_addr_lo = low_address(cmd_q->qdma_tail);
+		iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
+		iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
+
+		dma_addr_hi = high_address(cmd_q->qdma_tail);
+		cmd_q->qcontrol |= (dma_addr_hi << 16);
+		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+		/* Find the LSB regions accessible to the queue */
+		ccp_find_lsb_regions(cmd_q, status);
+		cmd_q->lsb = -1; /* Unassigned value */
+	}
+
+	dev_dbg(dev, "Assigning LSBs...\n");
+	ret = ccp_assign_lsbs(ccp);
+	if (ret) {
+		dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
+		goto e_irq;
+	}
+
+	/* Optimization: pre-allocate LSB slots for each queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
+		ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
+	}
+
+	dev_dbg(dev, "Starting threads...\n");
+	/* Create a kthread for each queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		struct task_struct *kthread;
+
+		cmd_q = &ccp->cmd_q[i];
+
+		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
+					 "%s-q%u", ccp->name, cmd_q->id);
+		if (IS_ERR(kthread)) {
+			dev_err(dev, "error creating queue thread (%ld)\n",
+				PTR_ERR(kthread));
+			ret = PTR_ERR(kthread);
+			goto e_kthread;
+		}
+
+		cmd_q->kthread = kthread;
+		wake_up_process(kthread);
+	}
+
+	dev_dbg(dev, "Enabling interrupts...\n");
+	ccp5_enable_queue_interrupts(ccp);
+
+	dev_dbg(dev, "Registering device...\n");
+	/* Put this on the unit list to make it available */
+	ccp_add_device(ccp);
+
+	ret = ccp_register_rng(ccp);
+	if (ret)
+		goto e_kthread;
+
+	/* Register the DMA engine support */
+	ret = ccp_dmaengine_register(ccp);
+	if (ret)
+		goto e_hwrng;
+
+	/* Set up debugfs entries */
+	ccp5_debugfs_setup(ccp);
+
+	return 0;
+
+e_hwrng:
+	ccp_unregister_rng(ccp);
+
+e_kthread:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+e_irq:
+	sp_free_ccp_irq(ccp->sp, ccp);
+
+e_pool:
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
+
+	return ret;
+}
+
+static void ccp5_destroy(struct ccp_device *ccp)
+{
+	struct device *dev = ccp->dev;
+	struct ccp_cmd_queue *cmd_q;
+	struct ccp_cmd *cmd;
+	unsigned int i;
+
+	/* Unregister the DMA engine */
+	ccp_dmaengine_unregister(ccp);
+
+	/* Unregister the RNG */
+	ccp_unregister_rng(ccp);
+
+	/* Remove this device from the list of available units first */
+	ccp_del_device(ccp);
+
+	/* We're in the process of tearing down the entire driver;
+	 * when all the devices are gone clean up debugfs
+	 */
+	if (ccp_present())
+		ccp5_debugfs_destroy();
+
+	/* Disable and clear interrupts */
+	ccp5_disable_queue_interrupts(ccp);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+
+		/* Turn off the run bit */
+		iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
+
+		/* Clear the interrupt status */
+		iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_interrupt_status);
+		ioread32(cmd_q->reg_int_status);
+		ioread32(cmd_q->reg_status);
+	}
+
+	/* Stop the queue kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].kthread)
+			kthread_stop(ccp->cmd_q[i].kthread);
+
+	sp_free_ccp_irq(ccp->sp, ccp);
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		cmd_q = &ccp->cmd_q[i];
+		dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
+				  cmd_q->qbase_dma);
+	}
+
+	/* Flush the cmd and backlog queue */
+	while (!list_empty(&ccp->cmd)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+	while (!list_empty(&ccp->backlog)) {
+		/* Invoke the callback directly with an error code */
+		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+		cmd->callback(cmd->data, -ENODEV);
+	}
+}
+
+static void ccp5_config(struct ccp_device *ccp)
+{
+	/* Public side */
+	iowrite32(0x0, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
+}
+
+static void ccp5other_config(struct ccp_device *ccp)
+{
+	int i;
+	u32 rnd;
+
+	/* We own all of the queues on the NTB CCP */
+
+	iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
+	iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
+	for (i = 0; i < 12; i++) {
+		rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
+		iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
+	}
+
+	iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
+	iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
+	iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
+
+	iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
+	iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
+
+	iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
+
+	ccp5_config(ccp);
+}
+
+/* Version 5 adds some function, but is essentially the same as v5 */
+static const struct ccp_actions ccp5_actions = {
+	.aes = ccp5_perform_aes,
+	.xts_aes = ccp5_perform_xts_aes,
+	.sha = ccp5_perform_sha,
+	.des3 = ccp5_perform_des3,
+	.rsa = ccp5_perform_rsa,
+	.passthru = ccp5_perform_passthru,
+	.ecc = ccp5_perform_ecc,
+	.sballoc = ccp_lsb_alloc,
+	.sbfree = ccp_lsb_free,
+	.init = ccp5_init,
+	.destroy = ccp5_destroy,
+	.get_free_slots = ccp5_get_free_slots,
+};
+
+const struct ccp_vdata ccpv5a = {
+	.version = CCP_VERSION(5, 0),
+	.setup = ccp5_config,
+	.perform = &ccp5_actions,
+	.offset = 0x0,
+	.rsamax = CCP5_RSA_MAX_WIDTH,
+};
+
+const struct ccp_vdata ccpv5b = {
+	.version = CCP_VERSION(5, 0),
+	.dma_chan_attr = DMA_PRIVATE,
+	.setup = ccp5other_config,
+	.perform = &ccp5_actions,
+	.offset = 0x0,
+	.rsamax = CCP5_RSA_MAX_WIDTH,
+};
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
new file mode 100644
index 000000000..b8c94a01c
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -0,0 +1,643 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/cpu.h>
+#ifdef CONFIG_X86
+#include <asm/cpu_device_id.h>
+#endif
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+struct ccp_tasklet_data {
+	struct completion completion;
+	struct ccp_cmd *cmd;
+};
+
+/* Human-readable error strings */
+#define CCP_MAX_ERROR_CODE	64
+static char *ccp_error_codes[] = {
+	"",
+	"ILLEGAL_ENGINE",
+	"ILLEGAL_KEY_ID",
+	"ILLEGAL_FUNCTION_TYPE",
+	"ILLEGAL_FUNCTION_MODE",
+	"ILLEGAL_FUNCTION_ENCRYPT",
+	"ILLEGAL_FUNCTION_SIZE",
+	"Zlib_MISSING_INIT_EOM",
+	"ILLEGAL_FUNCTION_RSVD",
+	"ILLEGAL_BUFFER_LENGTH",
+	"VLSB_FAULT",
+	"ILLEGAL_MEM_ADDR",
+	"ILLEGAL_MEM_SEL",
+	"ILLEGAL_CONTEXT_ID",
+	"ILLEGAL_KEY_ADDR",
+	"0xF Reserved",
+	"Zlib_ILLEGAL_MULTI_QUEUE",
+	"Zlib_ILLEGAL_JOBID_CHANGE",
+	"CMD_TIMEOUT",
+	"IDMA0_AXI_SLVERR",
+	"IDMA0_AXI_DECERR",
+	"0x15 Reserved",
+	"IDMA1_AXI_SLAVE_FAULT",
+	"IDMA1_AIXI_DECERR",
+	"0x18 Reserved",
+	"ZLIBVHB_AXI_SLVERR",
+	"ZLIBVHB_AXI_DECERR",
+	"0x1B Reserved",
+	"ZLIB_UNEXPECTED_EOM",
+	"ZLIB_EXTRA_DATA",
+	"ZLIB_BTYPE",
+	"ZLIB_UNDEFINED_SYMBOL",
+	"ZLIB_UNDEFINED_DISTANCE_S",
+	"ZLIB_CODE_LENGTH_SYMBOL",
+	"ZLIB _VHB_ILLEGAL_FETCH",
+	"ZLIB_UNCOMPRESSED_LEN",
+	"ZLIB_LIMIT_REACHED",
+	"ZLIB_CHECKSUM_MISMATCH0",
+	"ODMA0_AXI_SLVERR",
+	"ODMA0_AXI_DECERR",
+	"0x28 Reserved",
+	"ODMA1_AXI_SLVERR",
+	"ODMA1_AXI_DECERR",
+};
+
+void ccp_log_error(struct ccp_device *d, unsigned int e)
+{
+	if (WARN_ON(e >= CCP_MAX_ERROR_CODE))
+		return;
+
+	if (e < ARRAY_SIZE(ccp_error_codes))
+		dev_err(d->dev, "CCP error %d: %s\n", e, ccp_error_codes[e]);
+	else
+		dev_err(d->dev, "CCP error %d: Unknown Error\n", e);
+}
+
+/* List of CCPs, CCP count, read-write access lock, and access functions
+ *
+ * Lock structure: get ccp_unit_lock for reading whenever we need to
+ * examine the CCP list. While holding it for reading we can acquire
+ * the RR lock to update the round-robin next-CCP pointer. The unit lock
+ * must be acquired before the RR lock.
+ *
+ * If the unit-lock is acquired for writing, we have total control over
+ * the list, so there's no value in getting the RR lock.
+ */
+static DEFINE_RWLOCK(ccp_unit_lock);
+static LIST_HEAD(ccp_units);
+
+/* Round-robin counter */
+static DEFINE_SPINLOCK(ccp_rr_lock);
+static struct ccp_device *ccp_rr;
+
+/**
+ * ccp_add_device - add a CCP device to the list
+ *
+ * @ccp: ccp_device struct pointer
+ *
+ * Put this CCP on the unit list, which makes it available
+ * for use.
+ *
+ * Returns zero if a CCP device is present, -ENODEV otherwise.
+ */
+void ccp_add_device(struct ccp_device *ccp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&ccp_unit_lock, flags);
+	list_add_tail(&ccp->entry, &ccp_units);
+	if (!ccp_rr)
+		/* We already have the list lock (we're first) so this
+		 * pointer can't change on us. Set its initial value.
+		 */
+		ccp_rr = ccp;
+	write_unlock_irqrestore(&ccp_unit_lock, flags);
+}
+
+/**
+ * ccp_del_device - remove a CCP device from the list
+ *
+ * @ccp: ccp_device struct pointer
+ *
+ * Remove this unit from the list of devices. If the next device
+ * up for use is this one, adjust the pointer. If this is the last
+ * device, NULL the pointer.
+ */
+void ccp_del_device(struct ccp_device *ccp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&ccp_unit_lock, flags);
+	if (ccp_rr == ccp) {
+		/* ccp_unit_lock is read/write; any read access
+		 * will be suspended while we make changes to the
+		 * list and RR pointer.
+		 */
+		if (list_is_last(&ccp_rr->entry, &ccp_units))
+			ccp_rr = list_first_entry(&ccp_units, struct ccp_device,
+						  entry);
+		else
+			ccp_rr = list_next_entry(ccp_rr, entry);
+	}
+	list_del(&ccp->entry);
+	if (list_empty(&ccp_units))
+		ccp_rr = NULL;
+	write_unlock_irqrestore(&ccp_unit_lock, flags);
+}
+
+
+
+int ccp_register_rng(struct ccp_device *ccp)
+{
+	int ret = 0;
+
+	dev_dbg(ccp->dev, "Registering RNG...\n");
+	/* Register an RNG */
+	ccp->hwrng.name = ccp->rngname;
+	ccp->hwrng.read = ccp_trng_read;
+	ret = hwrng_register(&ccp->hwrng);
+	if (ret)
+		dev_err(ccp->dev, "error registering hwrng (%d)\n", ret);
+
+	return ret;
+}
+
+void ccp_unregister_rng(struct ccp_device *ccp)
+{
+	if (ccp->hwrng.name)
+		hwrng_unregister(&ccp->hwrng);
+}
+
+static struct ccp_device *ccp_get_device(void)
+{
+	unsigned long flags;
+	struct ccp_device *dp = NULL;
+
+	/* We round-robin through the unit list.
+	 * The (ccp_rr) pointer refers to the next unit to use.
+	 */
+	read_lock_irqsave(&ccp_unit_lock, flags);
+	if (!list_empty(&ccp_units)) {
+		spin_lock(&ccp_rr_lock);
+		dp = ccp_rr;
+		if (list_is_last(&ccp_rr->entry, &ccp_units))
+			ccp_rr = list_first_entry(&ccp_units, struct ccp_device,
+						  entry);
+		else
+			ccp_rr = list_next_entry(ccp_rr, entry);
+		spin_unlock(&ccp_rr_lock);
+	}
+	read_unlock_irqrestore(&ccp_unit_lock, flags);
+
+	return dp;
+}
+
+/**
+ * ccp_present - check if a CCP device is present
+ *
+ * Returns zero if a CCP device is present, -ENODEV otherwise.
+ */
+int ccp_present(void)
+{
+	unsigned long flags;
+	int ret;
+
+	read_lock_irqsave(&ccp_unit_lock, flags);
+	ret = list_empty(&ccp_units);
+	read_unlock_irqrestore(&ccp_unit_lock, flags);
+
+	return ret ? -ENODEV : 0;
+}
+EXPORT_SYMBOL_GPL(ccp_present);
+
+/**
+ * ccp_version - get the version of the CCP device
+ *
+ * Returns the version from the first unit on the list;
+ * otherwise a zero if no CCP device is present
+ */
+unsigned int ccp_version(void)
+{
+	struct ccp_device *dp;
+	unsigned long flags;
+	int ret = 0;
+
+	read_lock_irqsave(&ccp_unit_lock, flags);
+	if (!list_empty(&ccp_units)) {
+		dp = list_first_entry(&ccp_units, struct ccp_device, entry);
+		ret = dp->vdata->version;
+	}
+	read_unlock_irqrestore(&ccp_unit_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ccp_version);
+
+/**
+ * ccp_enqueue_cmd - queue an operation for processing by the CCP
+ *
+ * @cmd: ccp_cmd struct to be processed
+ *
+ * Queue a cmd to be processed by the CCP. If queueing the cmd
+ * would exceed the defined length of the cmd queue the cmd will
+ * only be queued if the CCP_CMD_MAY_BACKLOG flag is set and will
+ * result in a return code of -EBUSY.
+ *
+ * The callback routine specified in the ccp_cmd struct will be
+ * called to notify the caller of completion (if the cmd was not
+ * backlogged) or advancement out of the backlog. If the cmd has
+ * advanced out of the backlog the "err" value of the callback
+ * will be -EINPROGRESS. Any other "err" value during callback is
+ * the result of the operation.
+ *
+ * The cmd has been successfully queued if:
+ *   the return code is -EINPROGRESS or
+ *   the return code is -EBUSY and CCP_CMD_MAY_BACKLOG flag is set
+ */
+int ccp_enqueue_cmd(struct ccp_cmd *cmd)
+{
+	struct ccp_device *ccp;
+	unsigned long flags;
+	unsigned int i;
+	int ret;
+
+	/* Some commands might need to be sent to a specific device */
+	ccp = cmd->ccp ? cmd->ccp : ccp_get_device();
+
+	if (!ccp)
+		return -ENODEV;
+
+	/* Caller must supply a callback routine */
+	if (!cmd->callback)
+		return -EINVAL;
+
+	cmd->ccp = ccp;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	i = ccp->cmd_q_count;
+
+	if (ccp->cmd_count >= MAX_CMD_QLEN) {
+		if (cmd->flags & CCP_CMD_MAY_BACKLOG) {
+			ret = -EBUSY;
+			list_add_tail(&cmd->entry, &ccp->backlog);
+		} else {
+			ret = -ENOSPC;
+		}
+	} else {
+		ret = -EINPROGRESS;
+		ccp->cmd_count++;
+		list_add_tail(&cmd->entry, &ccp->cmd);
+
+		/* Find an idle queue */
+		if (!ccp->suspending) {
+			for (i = 0; i < ccp->cmd_q_count; i++) {
+				if (ccp->cmd_q[i].active)
+					continue;
+
+				break;
+			}
+		}
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* If we found an idle queue, wake it up */
+	if (i < ccp->cmd_q_count)
+		wake_up_process(ccp->cmd_q[i].kthread);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ccp_enqueue_cmd);
+
+static void ccp_do_cmd_backlog(struct work_struct *work)
+{
+	struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work);
+	struct ccp_device *ccp = cmd->ccp;
+	unsigned long flags;
+	unsigned int i;
+
+	cmd->callback(cmd->data, -EINPROGRESS);
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->cmd_count++;
+	list_add_tail(&cmd->entry, &ccp->cmd);
+
+	/* Find an idle queue */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		if (ccp->cmd_q[i].active)
+			continue;
+
+		break;
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* If we found an idle queue, wake it up */
+	if (i < ccp->cmd_q_count)
+		wake_up_process(ccp->cmd_q[i].kthread);
+}
+
+static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q)
+{
+	struct ccp_device *ccp = cmd_q->ccp;
+	struct ccp_cmd *cmd = NULL;
+	struct ccp_cmd *backlog = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	cmd_q->active = 0;
+
+	if (ccp->suspending) {
+		cmd_q->suspended = 1;
+
+		spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+		wake_up_interruptible(&ccp->suspend_queue);
+
+		return NULL;
+	}
+
+	if (ccp->cmd_count) {
+		cmd_q->active = 1;
+
+		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
+		list_del(&cmd->entry);
+
+		ccp->cmd_count--;
+	}
+
+	if (!list_empty(&ccp->backlog)) {
+		backlog = list_first_entry(&ccp->backlog, struct ccp_cmd,
+					   entry);
+		list_del(&backlog->entry);
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	if (backlog) {
+		INIT_WORK(&backlog->work, ccp_do_cmd_backlog);
+		schedule_work(&backlog->work);
+	}
+
+	return cmd;
+}
+
+static void ccp_do_cmd_complete(unsigned long data)
+{
+	struct ccp_tasklet_data *tdata = (struct ccp_tasklet_data *)data;
+	struct ccp_cmd *cmd = tdata->cmd;
+
+	cmd->callback(cmd->data, cmd->ret);
+
+	complete(&tdata->completion);
+}
+
+/**
+ * ccp_cmd_queue_thread - create a kernel thread to manage a CCP queue
+ *
+ * @data: thread-specific data
+ */
+int ccp_cmd_queue_thread(void *data)
+{
+	struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data;
+	struct ccp_cmd *cmd;
+	struct ccp_tasklet_data tdata;
+	struct tasklet_struct tasklet;
+
+	tasklet_init(&tasklet, ccp_do_cmd_complete, (unsigned long)&tdata);
+
+	set_current_state(TASK_INTERRUPTIBLE);
+	while (!kthread_should_stop()) {
+		schedule();
+
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		cmd = ccp_dequeue_cmd(cmd_q);
+		if (!cmd)
+			continue;
+
+		__set_current_state(TASK_RUNNING);
+
+		/* Execute the command */
+		cmd->ret = ccp_run_cmd(cmd_q, cmd);
+
+		/* Schedule the completion callback */
+		tdata.cmd = cmd;
+		init_completion(&tdata.completion);
+		tasklet_schedule(&tasklet);
+		wait_for_completion(&tdata.completion);
+	}
+
+	__set_current_state(TASK_RUNNING);
+
+	return 0;
+}
+
+/**
+ * ccp_alloc_struct - allocate and initialize the ccp_device struct
+ *
+ * @dev: device struct of the CCP
+ */
+struct ccp_device *ccp_alloc_struct(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct ccp_device *ccp;
+
+	ccp = devm_kzalloc(dev, sizeof(*ccp), GFP_KERNEL);
+	if (!ccp)
+		return NULL;
+	ccp->dev = dev;
+	ccp->sp = sp;
+	ccp->axcache = sp->axcache;
+
+	INIT_LIST_HEAD(&ccp->cmd);
+	INIT_LIST_HEAD(&ccp->backlog);
+
+	spin_lock_init(&ccp->cmd_lock);
+	mutex_init(&ccp->req_mutex);
+	mutex_init(&ccp->sb_mutex);
+	ccp->sb_count = KSB_COUNT;
+	ccp->sb_start = 0;
+
+	/* Initialize the wait queues */
+	init_waitqueue_head(&ccp->sb_queue);
+	init_waitqueue_head(&ccp->suspend_queue);
+
+	snprintf(ccp->name, MAX_CCP_NAME_LEN, "ccp-%u", sp->ord);
+	snprintf(ccp->rngname, MAX_CCP_NAME_LEN, "ccp-%u-rng", sp->ord);
+
+	return ccp;
+}
+
+int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait)
+{
+	struct ccp_device *ccp = container_of(rng, struct ccp_device, hwrng);
+	u32 trng_value;
+	int len = min_t(int, sizeof(trng_value), max);
+
+	/* Locking is provided by the caller so we can update device
+	 * hwrng-related fields safely
+	 */
+	trng_value = ioread32(ccp->io_regs + TRNG_OUT_REG);
+	if (!trng_value) {
+		/* Zero is returned if not data is available or if a
+		 * bad-entropy error is present. Assume an error if
+		 * we exceed TRNG_RETRIES reads of zero.
+		 */
+		if (ccp->hwrng_retries++ > TRNG_RETRIES)
+			return -EIO;
+
+		return 0;
+	}
+
+	/* Reset the counter and save the rng value */
+	ccp->hwrng_retries = 0;
+	memcpy(data, &trng_value, len);
+
+	return len;
+}
+
+#ifdef CONFIG_PM
+bool ccp_queues_suspended(struct ccp_device *ccp)
+{
+	unsigned int suspended = 0;
+	unsigned long flags;
+	unsigned int i;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		if (ccp->cmd_q[i].suspended)
+			suspended++;
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	return ccp->cmd_q_count == suspended;
+}
+
+int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
+{
+	struct ccp_device *ccp = sp->ccp_data;
+	unsigned long flags;
+	unsigned int i;
+
+	/* If there's no device there's nothing to do */
+	if (!ccp)
+		return 0;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 1;
+
+	/* Wake all the queue kthreads to prepare for suspend */
+	for (i = 0; i < ccp->cmd_q_count; i++)
+		wake_up_process(ccp->cmd_q[i].kthread);
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	/* Wait for all queue kthreads to say they're done */
+	while (!ccp_queues_suspended(ccp))
+		wait_event_interruptible(ccp->suspend_queue,
+					 ccp_queues_suspended(ccp));
+
+	return 0;
+}
+
+int ccp_dev_resume(struct sp_device *sp)
+{
+	struct ccp_device *ccp = sp->ccp_data;
+	unsigned long flags;
+	unsigned int i;
+
+	/* If there's no device there's nothing to do */
+	if (!ccp)
+		return 0;
+
+	spin_lock_irqsave(&ccp->cmd_lock, flags);
+
+	ccp->suspending = 0;
+
+	/* Wake up all the kthreads */
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		ccp->cmd_q[i].suspended = 0;
+		wake_up_process(ccp->cmd_q[i].kthread);
+	}
+
+	spin_unlock_irqrestore(&ccp->cmd_lock, flags);
+
+	return 0;
+}
+#endif
+
+int ccp_dev_init(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct ccp_device *ccp;
+	int ret;
+
+	ret = -ENOMEM;
+	ccp = ccp_alloc_struct(sp);
+	if (!ccp)
+		goto e_err;
+	sp->ccp_data = ccp;
+
+	ccp->vdata = (struct ccp_vdata *)sp->dev_vdata->ccp_vdata;
+	if (!ccp->vdata || !ccp->vdata->version) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ccp->use_tasklet = sp->use_tasklet;
+
+	ccp->io_regs = sp->io_map + ccp->vdata->offset;
+	if (ccp->vdata->setup)
+		ccp->vdata->setup(ccp);
+
+	ret = ccp->vdata->perform->init(ccp);
+	if (ret)
+		goto e_err;
+
+	dev_notice(dev, "ccp enabled\n");
+
+	return 0;
+
+e_err:
+	sp->ccp_data = NULL;
+
+	dev_notice(dev, "ccp initialization failed\n");
+
+	return ret;
+}
+
+void ccp_dev_destroy(struct sp_device *sp)
+{
+	struct ccp_device *ccp = sp->ccp_data;
+
+	if (!ccp)
+		return;
+
+	ccp->vdata->perform->destroy(ccp);
+}
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
new file mode 100644
index 000000000..bd43b5c14
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -0,0 +1,675 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CCP_DEV_H__
+#define __CCP_DEV_H__
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/dmaengine.h>
+
+#include "sp-dev.h"
+
+#define MAX_CCP_NAME_LEN		16
+#define MAX_DMAPOOL_NAME_LEN		32
+
+#define MAX_HW_QUEUES			5
+#define MAX_CMD_QLEN			100
+
+#define TRNG_RETRIES			10
+
+#define CACHE_NONE			0x00
+#define CACHE_WB_NO_ALLOC		0xb7
+
+/****** Register Mappings ******/
+#define Q_MASK_REG			0x000
+#define TRNG_OUT_REG			0x00c
+#define IRQ_MASK_REG			0x040
+#define IRQ_STATUS_REG			0x200
+
+#define DEL_CMD_Q_JOB			0x124
+#define DEL_Q_ACTIVE			0x00000200
+#define DEL_Q_ID_SHIFT			6
+
+#define CMD_REQ0			0x180
+#define CMD_REQ_INCR			0x04
+
+#define CMD_Q_STATUS_BASE		0x210
+#define CMD_Q_INT_STATUS_BASE		0x214
+#define CMD_Q_STATUS_INCR		0x20
+
+#define CMD_Q_CACHE_BASE		0x228
+#define CMD_Q_CACHE_INC			0x20
+
+#define CMD_Q_ERROR(__qs)		((__qs) & 0x0000003f)
+#define CMD_Q_DEPTH(__qs)		(((__qs) >> 12) & 0x0000000f)
+
+/* ------------------------ CCP Version 5 Specifics ------------------------ */
+#define CMD5_QUEUE_MASK_OFFSET		0x00
+#define	CMD5_QUEUE_PRIO_OFFSET		0x04
+#define CMD5_REQID_CONFIG_OFFSET	0x08
+#define	CMD5_CMD_TIMEOUT_OFFSET		0x10
+#define LSB_PUBLIC_MASK_LO_OFFSET	0x18
+#define LSB_PUBLIC_MASK_HI_OFFSET	0x1C
+#define LSB_PRIVATE_MASK_LO_OFFSET	0x20
+#define LSB_PRIVATE_MASK_HI_OFFSET	0x24
+#define CMD5_PSP_CCP_VERSION		0x100
+
+#define CMD5_Q_CONTROL_BASE		0x0000
+#define CMD5_Q_TAIL_LO_BASE		0x0004
+#define CMD5_Q_HEAD_LO_BASE		0x0008
+#define CMD5_Q_INT_ENABLE_BASE		0x000C
+#define CMD5_Q_INTERRUPT_STATUS_BASE	0x0010
+
+#define CMD5_Q_STATUS_BASE		0x0100
+#define CMD5_Q_INT_STATUS_BASE		0x0104
+#define CMD5_Q_DMA_STATUS_BASE		0x0108
+#define CMD5_Q_DMA_READ_STATUS_BASE	0x010C
+#define CMD5_Q_DMA_WRITE_STATUS_BASE	0x0110
+#define CMD5_Q_ABORT_BASE		0x0114
+#define CMD5_Q_AX_CACHE_BASE		0x0118
+
+#define	CMD5_CONFIG_0_OFFSET		0x6000
+#define	CMD5_TRNG_CTL_OFFSET		0x6008
+#define	CMD5_AES_MASK_OFFSET		0x6010
+#define	CMD5_CLK_GATE_CTL_OFFSET	0x603C
+
+/* Address offset between two virtual queue registers */
+#define CMD5_Q_STATUS_INCR		0x1000
+
+/* Bit masks */
+#define CMD5_Q_RUN			0x1
+#define CMD5_Q_HALT			0x2
+#define CMD5_Q_MEM_LOCATION		0x4
+#define CMD5_Q_SIZE			0x1F
+#define CMD5_Q_SHIFT			3
+#define COMMANDS_PER_QUEUE		16
+#define QUEUE_SIZE_VAL			((ffs(COMMANDS_PER_QUEUE) - 2) & \
+					  CMD5_Q_SIZE)
+#define Q_PTR_MASK			(2 << (QUEUE_SIZE_VAL + 5) - 1)
+#define Q_DESC_SIZE			sizeof(struct ccp5_desc)
+#define Q_SIZE(n)			(COMMANDS_PER_QUEUE*(n))
+
+#define INT_COMPLETION			0x1
+#define INT_ERROR			0x2
+#define INT_QUEUE_STOPPED		0x4
+#define	INT_EMPTY_QUEUE			0x8
+#define SUPPORTED_INTERRUPTS		(INT_COMPLETION | INT_ERROR)
+
+#define LSB_REGION_WIDTH		5
+#define MAX_LSB_CNT			8
+
+#define LSB_SIZE			16
+#define LSB_ITEM_SIZE			32
+#define PLSB_MAP_SIZE			(LSB_SIZE)
+#define SLSB_MAP_SIZE			(MAX_LSB_CNT * LSB_SIZE)
+
+#define LSB_ENTRY_NUMBER(LSB_ADDR)	(LSB_ADDR / LSB_ITEM_SIZE)
+
+/* ------------------------ CCP Version 3 Specifics ------------------------ */
+#define REQ0_WAIT_FOR_WRITE		0x00000004
+#define REQ0_INT_ON_COMPLETE		0x00000002
+#define REQ0_STOP_ON_COMPLETE		0x00000001
+
+#define REQ0_CMD_Q_SHIFT		9
+#define REQ0_JOBID_SHIFT		3
+
+/****** REQ1 Related Values ******/
+#define REQ1_PROTECT_SHIFT		27
+#define REQ1_ENGINE_SHIFT		23
+#define REQ1_KEY_KSB_SHIFT		2
+
+#define REQ1_EOM			0x00000002
+#define REQ1_INIT			0x00000001
+
+/* AES Related Values */
+#define REQ1_AES_TYPE_SHIFT		21
+#define REQ1_AES_MODE_SHIFT		18
+#define REQ1_AES_ACTION_SHIFT		17
+#define REQ1_AES_CFB_SIZE_SHIFT		10
+
+/* XTS-AES Related Values */
+#define REQ1_XTS_AES_SIZE_SHIFT		10
+
+/* SHA Related Values */
+#define REQ1_SHA_TYPE_SHIFT		21
+
+/* RSA Related Values */
+#define REQ1_RSA_MOD_SIZE_SHIFT		10
+
+/* Pass-Through Related Values */
+#define REQ1_PT_BW_SHIFT		12
+#define REQ1_PT_BS_SHIFT		10
+
+/* ECC Related Values */
+#define REQ1_ECC_AFFINE_CONVERT		0x00200000
+#define REQ1_ECC_FUNCTION_SHIFT		18
+
+/****** REQ4 Related Values ******/
+#define REQ4_KSB_SHIFT			18
+#define REQ4_MEMTYPE_SHIFT		16
+
+/****** REQ6 Related Values ******/
+#define REQ6_MEMTYPE_SHIFT		16
+
+/****** Key Storage Block ******/
+#define KSB_START			77
+#define KSB_END				127
+#define KSB_COUNT			(KSB_END - KSB_START + 1)
+#define CCP_SB_BITS			256
+
+#define CCP_JOBID_MASK			0x0000003f
+
+/* ------------------------ General CCP Defines ------------------------ */
+
+#define	CCP_DMA_DFLT			0x0
+#define	CCP_DMA_PRIV			0x1
+#define	CCP_DMA_PUB			0x2
+
+#define CCP_DMAPOOL_MAX_SIZE		64
+#define CCP_DMAPOOL_ALIGN		BIT(5)
+
+#define CCP_REVERSE_BUF_SIZE		64
+
+#define CCP_AES_KEY_SB_COUNT		1
+#define CCP_AES_CTX_SB_COUNT		1
+
+#define CCP_XTS_AES_KEY_SB_COUNT	1
+#define CCP5_XTS_AES_KEY_SB_COUNT	2
+#define CCP_XTS_AES_CTX_SB_COUNT	1
+
+#define CCP_DES3_KEY_SB_COUNT		1
+#define CCP_DES3_CTX_SB_COUNT		1
+
+#define CCP_SHA_SB_COUNT		1
+
+#define CCP_RSA_MAX_WIDTH		4096
+#define CCP5_RSA_MAX_WIDTH		16384
+
+#define CCP_PASSTHRU_BLOCKSIZE		256
+#define CCP_PASSTHRU_MASKSIZE		32
+#define CCP_PASSTHRU_SB_COUNT		1
+
+#define CCP_ECC_MODULUS_BYTES		48      /* 384-bits */
+#define CCP_ECC_MAX_OPERANDS		6
+#define CCP_ECC_MAX_OUTPUTS		3
+#define CCP_ECC_SRC_BUF_SIZE		448
+#define CCP_ECC_DST_BUF_SIZE		192
+#define CCP_ECC_OPERAND_SIZE		64
+#define CCP_ECC_OUTPUT_SIZE		64
+#define CCP_ECC_RESULT_OFFSET		60
+#define CCP_ECC_RESULT_SUCCESS		0x0001
+
+#define CCP_SB_BYTES			32
+
+struct ccp_op;
+struct ccp_device;
+struct ccp_cmd;
+struct ccp_fns;
+
+struct ccp_dma_cmd {
+	struct list_head entry;
+
+	struct ccp_cmd ccp_cmd;
+};
+
+struct ccp_dma_desc {
+	struct list_head entry;
+
+	struct ccp_device *ccp;
+
+	struct list_head pending;
+	struct list_head active;
+
+	enum dma_status status;
+	struct dma_async_tx_descriptor tx_desc;
+	size_t len;
+};
+
+struct ccp_dma_chan {
+	struct ccp_device *ccp;
+
+	spinlock_t lock;
+	struct list_head created;
+	struct list_head pending;
+	struct list_head active;
+	struct list_head complete;
+
+	struct tasklet_struct cleanup_tasklet;
+
+	enum dma_status status;
+	struct dma_chan dma_chan;
+};
+
+struct ccp_cmd_queue {
+	struct ccp_device *ccp;
+
+	/* Queue identifier */
+	u32 id;
+
+	/* Queue dma pool */
+	struct dma_pool *dma_pool;
+
+	/* Queue base address (not neccessarily aligned)*/
+	struct ccp5_desc *qbase;
+
+	/* Aligned queue start address (per requirement) */
+	struct mutex q_mutex ____cacheline_aligned;
+	unsigned int qidx;
+
+	/* Version 5 has different requirements for queue memory */
+	unsigned int qsize;
+	dma_addr_t qbase_dma;
+	dma_addr_t qdma_tail;
+
+	/* Per-queue reserved storage block(s) */
+	u32 sb_key;
+	u32 sb_ctx;
+
+	/* Bitmap of LSBs that can be accessed by this queue */
+	DECLARE_BITMAP(lsbmask, MAX_LSB_CNT);
+	/* Private LSB that is assigned to this queue, or -1 if none.
+	 * Bitmap for my private LSB, unused otherwise
+	 */
+	int lsb;
+	DECLARE_BITMAP(lsbmap, PLSB_MAP_SIZE);
+
+	/* Queue processing thread */
+	struct task_struct *kthread;
+	unsigned int active;
+	unsigned int suspended;
+
+	/* Number of free command slots available */
+	unsigned int free_slots;
+
+	/* Interrupt masks */
+	u32 int_ok;
+	u32 int_err;
+
+	/* Register addresses for queue */
+	void __iomem *reg_control;
+	void __iomem *reg_tail_lo;
+	void __iomem *reg_head_lo;
+	void __iomem *reg_int_enable;
+	void __iomem *reg_interrupt_status;
+	void __iomem *reg_status;
+	void __iomem *reg_int_status;
+	void __iomem *reg_dma_status;
+	void __iomem *reg_dma_read_status;
+	void __iomem *reg_dma_write_status;
+	u32 qcontrol; /* Cached control register */
+
+	/* Status values from job */
+	u32 int_status;
+	u32 q_status;
+	u32 q_int_status;
+	u32 cmd_error;
+
+	/* Interrupt wait queue */
+	wait_queue_head_t int_queue;
+	unsigned int int_rcvd;
+
+	/* Per-queue Statistics */
+	unsigned long total_ops;
+	unsigned long total_aes_ops;
+	unsigned long total_xts_aes_ops;
+	unsigned long total_3des_ops;
+	unsigned long total_sha_ops;
+	unsigned long total_rsa_ops;
+	unsigned long total_pt_ops;
+	unsigned long total_ecc_ops;
+} ____cacheline_aligned;
+
+struct ccp_device {
+	struct list_head entry;
+
+	struct ccp_vdata *vdata;
+	unsigned int ord;
+	char name[MAX_CCP_NAME_LEN];
+	char rngname[MAX_CCP_NAME_LEN];
+
+	struct device *dev;
+	struct sp_device *sp;
+
+	/* Bus specific device information
+	 */
+	void *dev_specific;
+	unsigned int qim;
+	unsigned int irq;
+	bool use_tasklet;
+	struct tasklet_struct irq_tasklet;
+
+	/* I/O area used for device communication. The register mapping
+	 * starts at an offset into the mapped bar.
+	 *   The CMD_REQx registers and the Delete_Cmd_Queue_Job register
+	 *   need to be protected while a command queue thread is accessing
+	 *   them.
+	 */
+	struct mutex req_mutex ____cacheline_aligned;
+	void __iomem *io_regs;
+
+	/* Master lists that all cmds are queued on. Because there can be
+	 * more than one CCP command queue that can process a cmd a separate
+	 * backlog list is neeeded so that the backlog completion call
+	 * completes before the cmd is available for execution.
+	 */
+	spinlock_t cmd_lock ____cacheline_aligned;
+	unsigned int cmd_count;
+	struct list_head cmd;
+	struct list_head backlog;
+
+	/* The command queues. These represent the queues available on the
+	 * CCP that are available for processing cmds
+	 */
+	struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
+	unsigned int cmd_q_count;
+
+	/* Support for the CCP True RNG
+	 */
+	struct hwrng hwrng;
+	unsigned int hwrng_retries;
+
+	/* Support for the CCP DMA capabilities
+	 */
+	struct dma_device dma_dev;
+	struct ccp_dma_chan *ccp_dma_chan;
+	struct kmem_cache *dma_cmd_cache;
+	struct kmem_cache *dma_desc_cache;
+
+	/* A counter used to generate job-ids for cmds submitted to the CCP
+	 */
+	atomic_t current_id ____cacheline_aligned;
+
+	/* The v3 CCP uses key storage blocks (SB) to maintain context for
+	 * certain operations. To prevent multiple cmds from using the same
+	 * SB range a command queue reserves an SB range for the duration of
+	 * the cmd. Each queue, will however, reserve 2 SB blocks for
+	 * operations that only require single SB entries (eg. AES context/iv
+	 * and key) in order to avoid allocation contention.  This will reserve
+	 * at most 10 SB entries, leaving 40 SB entries available for dynamic
+	 * allocation.
+	 *
+	 * The v5 CCP Local Storage Block (LSB) is broken up into 8
+	 * memrory ranges, each of which can be enabled for access by one
+	 * or more queues. Device initialization takes this into account,
+	 * and attempts to assign one region for exclusive use by each
+	 * available queue; the rest are then aggregated as "public" use.
+	 * If there are fewer regions than queues, all regions are shared
+	 * amongst all queues.
+	 */
+	struct mutex sb_mutex ____cacheline_aligned;
+	DECLARE_BITMAP(sb, KSB_COUNT);
+	wait_queue_head_t sb_queue;
+	unsigned int sb_avail;
+	unsigned int sb_count;
+	u32 sb_start;
+
+	/* Bitmap of shared LSBs, if any */
+	DECLARE_BITMAP(lsbmap, SLSB_MAP_SIZE);
+
+	/* Suspend support */
+	unsigned int suspending;
+	wait_queue_head_t suspend_queue;
+
+	/* DMA caching attribute support */
+	unsigned int axcache;
+
+	/* Device Statistics */
+	unsigned long total_interrupts;
+
+	/* DebugFS info */
+	struct dentry *debugfs_instance;
+};
+
+enum ccp_memtype {
+	CCP_MEMTYPE_SYSTEM = 0,
+	CCP_MEMTYPE_SB,
+	CCP_MEMTYPE_LOCAL,
+	CCP_MEMTYPE__LAST,
+};
+#define	CCP_MEMTYPE_LSB	CCP_MEMTYPE_KSB
+
+
+struct ccp_dma_info {
+	dma_addr_t address;
+	unsigned int offset;
+	unsigned int length;
+	enum dma_data_direction dir;
+} __packed __aligned(4);
+
+struct ccp_dm_workarea {
+	struct device *dev;
+	struct dma_pool *dma_pool;
+
+	u8 *address;
+	struct ccp_dma_info dma;
+	unsigned int length;
+};
+
+struct ccp_sg_workarea {
+	struct scatterlist *sg;
+	int nents;
+	unsigned int sg_used;
+
+	struct scatterlist *dma_sg;
+	struct scatterlist *dma_sg_head;
+	struct device *dma_dev;
+	unsigned int dma_count;
+	enum dma_data_direction dma_dir;
+
+	u64 bytes_left;
+};
+
+struct ccp_data {
+	struct ccp_sg_workarea sg_wa;
+	struct ccp_dm_workarea dm_wa;
+};
+
+struct ccp_mem {
+	enum ccp_memtype type;
+	union {
+		struct ccp_dma_info dma;
+		u32 sb;
+	} u;
+};
+
+struct ccp_aes_op {
+	enum ccp_aes_type type;
+	enum ccp_aes_mode mode;
+	enum ccp_aes_action action;
+	unsigned int size;
+};
+
+struct ccp_xts_aes_op {
+	enum ccp_aes_type type;
+	enum ccp_aes_action action;
+	enum ccp_xts_aes_unit_size unit_size;
+};
+
+struct ccp_des3_op {
+	enum ccp_des3_type type;
+	enum ccp_des3_mode mode;
+	enum ccp_des3_action action;
+};
+
+struct ccp_sha_op {
+	enum ccp_sha_type type;
+	u64 msg_bits;
+};
+
+struct ccp_rsa_op {
+	u32 mod_size;
+	u32 input_len;
+};
+
+struct ccp_passthru_op {
+	enum ccp_passthru_bitwise bit_mod;
+	enum ccp_passthru_byteswap byte_swap;
+};
+
+struct ccp_ecc_op {
+	enum ccp_ecc_function function;
+};
+
+struct ccp_op {
+	struct ccp_cmd_queue *cmd_q;
+
+	u32 jobid;
+	u32 ioc;
+	u32 soc;
+	u32 sb_key;
+	u32 sb_ctx;
+	u32 init;
+	u32 eom;
+
+	struct ccp_mem src;
+	struct ccp_mem dst;
+	struct ccp_mem exp;
+
+	union {
+		struct ccp_aes_op aes;
+		struct ccp_xts_aes_op xts;
+		struct ccp_des3_op des3;
+		struct ccp_sha_op sha;
+		struct ccp_rsa_op rsa;
+		struct ccp_passthru_op passthru;
+		struct ccp_ecc_op ecc;
+	} u;
+};
+
+static inline u32 ccp_addr_lo(struct ccp_dma_info *info)
+{
+	return lower_32_bits(info->address + info->offset);
+}
+
+static inline u32 ccp_addr_hi(struct ccp_dma_info *info)
+{
+	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
+}
+
+/**
+ * descriptor for version 5 CPP commands
+ * 8 32-bit words:
+ * word 0: function; engine; control bits
+ * word 1: length of source data
+ * word 2: low 32 bits of source pointer
+ * word 3: upper 16 bits of source pointer; source memory type
+ * word 4: low 32 bits of destination pointer
+ * word 5: upper 16 bits of destination pointer; destination memory type
+ * word 6: low 32 bits of key pointer
+ * word 7: upper 16 bits of key pointer; key memory type
+ */
+struct dword0 {
+	unsigned int soc:1;
+	unsigned int ioc:1;
+	unsigned int rsvd1:1;
+	unsigned int init:1;
+	unsigned int eom:1;		/* AES/SHA only */
+	unsigned int function:15;
+	unsigned int engine:4;
+	unsigned int prot:1;
+	unsigned int rsvd2:7;
+};
+
+struct dword3 {
+	unsigned int  src_hi:16;
+	unsigned int  src_mem:2;
+	unsigned int  lsb_cxt_id:8;
+	unsigned int  rsvd1:5;
+	unsigned int  fixed:1;
+};
+
+union dword4 {
+	__le32 dst_lo;		/* NON-SHA	*/
+	__le32 sha_len_lo;	/* SHA		*/
+};
+
+union dword5 {
+	struct {
+		unsigned int  dst_hi:16;
+		unsigned int  dst_mem:2;
+		unsigned int  rsvd1:13;
+		unsigned int  fixed:1;
+	} fields;
+	__le32 sha_len_hi;
+};
+
+struct dword7 {
+	unsigned int  key_hi:16;
+	unsigned int  key_mem:2;
+	unsigned int  rsvd1:14;
+};
+
+struct ccp5_desc {
+	struct dword0 dw0;
+	__le32 length;
+	__le32 src_lo;
+	struct dword3 dw3;
+	union dword4 dw4;
+	union dword5 dw5;
+	__le32 key_lo;
+	struct dword7 dw7;
+};
+
+void ccp_add_device(struct ccp_device *ccp);
+void ccp_del_device(struct ccp_device *ccp);
+
+extern void ccp_log_error(struct ccp_device *, unsigned int);
+
+struct ccp_device *ccp_alloc_struct(struct sp_device *sp);
+bool ccp_queues_suspended(struct ccp_device *ccp);
+int ccp_cmd_queue_thread(void *data);
+int ccp_trng_read(struct hwrng *rng, void *data, size_t max, bool wait);
+
+int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd);
+
+int ccp_register_rng(struct ccp_device *ccp);
+void ccp_unregister_rng(struct ccp_device *ccp);
+int ccp_dmaengine_register(struct ccp_device *ccp);
+void ccp_dmaengine_unregister(struct ccp_device *ccp);
+
+void ccp5_debugfs_setup(struct ccp_device *ccp);
+void ccp5_debugfs_destroy(void);
+
+/* Structure for computation functions that are device-specific */
+struct ccp_actions {
+	int (*aes)(struct ccp_op *);
+	int (*xts_aes)(struct ccp_op *);
+	int (*des3)(struct ccp_op *);
+	int (*sha)(struct ccp_op *);
+	int (*rsa)(struct ccp_op *);
+	int (*passthru)(struct ccp_op *);
+	int (*ecc)(struct ccp_op *);
+	u32 (*sballoc)(struct ccp_cmd_queue *, unsigned int);
+	void (*sbfree)(struct ccp_cmd_queue *, unsigned int, unsigned int);
+	unsigned int (*get_free_slots)(struct ccp_cmd_queue *);
+	int (*init)(struct ccp_device *);
+	void (*destroy)(struct ccp_device *);
+	irqreturn_t (*irqhandler)(int, void *);
+};
+
+extern const struct ccp_vdata ccpv3_platform;
+extern const struct ccp_vdata ccpv3;
+extern const struct ccp_vdata ccpv5a;
+extern const struct ccp_vdata ccpv5b;
+
+#endif
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
new file mode 100644
index 000000000..8209273eb
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -0,0 +1,767 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/dmaengine.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+#include "../../dma/dmaengine.h"
+
+#define CCP_DMA_WIDTH(_mask)		\
+({					\
+	u64 mask = _mask + 1;		\
+	(mask == 0) ? 64 : fls64(mask);	\
+})
+
+/* The CCP as a DMA provider can be configured for public or private
+ * channels. Default is specified in the vdata for the device (PCI ID).
+ * This module parameter will override for all channels on all devices:
+ *   dma_chan_attr = 0x2 to force all channels public
+ *                 = 0x1 to force all channels private
+ *                 = 0x0 to defer to the vdata setting
+ *                 = any other value: warning, revert to 0x0
+ */
+static unsigned int dma_chan_attr = CCP_DMA_DFLT;
+module_param(dma_chan_attr, uint, 0444);
+MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public");
+
+static unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
+{
+	switch (dma_chan_attr) {
+	case CCP_DMA_DFLT:
+		return ccp->vdata->dma_chan_attr;
+
+	case CCP_DMA_PRIV:
+		return DMA_PRIVATE;
+
+	case CCP_DMA_PUB:
+		return 0;
+
+	default:
+		dev_info_once(ccp->dev, "Invalid value for dma_chan_attr: %d\n",
+			      dma_chan_attr);
+		return ccp->vdata->dma_chan_attr;
+	}
+}
+
+static void ccp_free_cmd_resources(struct ccp_device *ccp,
+				   struct list_head *list)
+{
+	struct ccp_dma_cmd *cmd, *ctmp;
+
+	list_for_each_entry_safe(cmd, ctmp, list, entry) {
+		list_del(&cmd->entry);
+		kmem_cache_free(ccp->dma_cmd_cache, cmd);
+	}
+}
+
+static void ccp_free_desc_resources(struct ccp_device *ccp,
+				    struct list_head *list)
+{
+	struct ccp_dma_desc *desc, *dtmp;
+
+	list_for_each_entry_safe(desc, dtmp, list, entry) {
+		ccp_free_cmd_resources(ccp, &desc->active);
+		ccp_free_cmd_resources(ccp, &desc->pending);
+
+		list_del(&desc->entry);
+		kmem_cache_free(ccp->dma_desc_cache, desc);
+	}
+}
+
+static void ccp_free_chan_resources(struct dma_chan *dma_chan)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+	unsigned long flags;
+
+	dev_dbg(chan->ccp->dev, "%s - chan=%p\n", __func__, chan);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	ccp_free_desc_resources(chan->ccp, &chan->complete);
+	ccp_free_desc_resources(chan->ccp, &chan->active);
+	ccp_free_desc_resources(chan->ccp, &chan->pending);
+	ccp_free_desc_resources(chan->ccp, &chan->created);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void ccp_cleanup_desc_resources(struct ccp_device *ccp,
+				       struct list_head *list)
+{
+	struct ccp_dma_desc *desc, *dtmp;
+
+	list_for_each_entry_safe_reverse(desc, dtmp, list, entry) {
+		if (!async_tx_test_ack(&desc->tx_desc))
+			continue;
+
+		dev_dbg(ccp->dev, "%s - desc=%p\n", __func__, desc);
+
+		ccp_free_cmd_resources(ccp, &desc->active);
+		ccp_free_cmd_resources(ccp, &desc->pending);
+
+		list_del(&desc->entry);
+		kmem_cache_free(ccp->dma_desc_cache, desc);
+	}
+}
+
+static void ccp_do_cleanup(unsigned long data)
+{
+	struct ccp_dma_chan *chan = (struct ccp_dma_chan *)data;
+	unsigned long flags;
+
+	dev_dbg(chan->ccp->dev, "%s - chan=%s\n", __func__,
+		dma_chan_name(&chan->dma_chan));
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	ccp_cleanup_desc_resources(chan->ccp, &chan->complete);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static int ccp_issue_next_cmd(struct ccp_dma_desc *desc)
+{
+	struct ccp_dma_cmd *cmd;
+	int ret;
+
+	cmd = list_first_entry(&desc->pending, struct ccp_dma_cmd, entry);
+	list_move(&cmd->entry, &desc->active);
+
+	dev_dbg(desc->ccp->dev, "%s - tx %d, cmd=%p\n", __func__,
+		desc->tx_desc.cookie, cmd);
+
+	ret = ccp_enqueue_cmd(&cmd->ccp_cmd);
+	if (!ret || (ret == -EINPROGRESS) || (ret == -EBUSY))
+		return 0;
+
+	dev_dbg(desc->ccp->dev, "%s - error: ret=%d, tx %d, cmd=%p\n", __func__,
+		ret, desc->tx_desc.cookie, cmd);
+
+	return ret;
+}
+
+static void ccp_free_active_cmd(struct ccp_dma_desc *desc)
+{
+	struct ccp_dma_cmd *cmd;
+
+	cmd = list_first_entry_or_null(&desc->active, struct ccp_dma_cmd,
+				       entry);
+	if (!cmd)
+		return;
+
+	dev_dbg(desc->ccp->dev, "%s - freeing tx %d cmd=%p\n",
+		__func__, desc->tx_desc.cookie, cmd);
+
+	list_del(&cmd->entry);
+	kmem_cache_free(desc->ccp->dma_cmd_cache, cmd);
+}
+
+static struct ccp_dma_desc *__ccp_next_dma_desc(struct ccp_dma_chan *chan,
+						struct ccp_dma_desc *desc)
+{
+	/* Move current DMA descriptor to the complete list */
+	if (desc)
+		list_move(&desc->entry, &chan->complete);
+
+	/* Get the next DMA descriptor on the active list */
+	desc = list_first_entry_or_null(&chan->active, struct ccp_dma_desc,
+					entry);
+
+	return desc;
+}
+
+static struct ccp_dma_desc *ccp_handle_active_desc(struct ccp_dma_chan *chan,
+						   struct ccp_dma_desc *desc)
+{
+	struct dma_async_tx_descriptor *tx_desc;
+	unsigned long flags;
+
+	/* Loop over descriptors until one is found with commands */
+	do {
+		if (desc) {
+			/* Remove the DMA command from the list and free it */
+			ccp_free_active_cmd(desc);
+
+			if (!list_empty(&desc->pending)) {
+				/* No errors, keep going */
+				if (desc->status != DMA_ERROR)
+					return desc;
+
+				/* Error, free remaining commands and move on */
+				ccp_free_cmd_resources(desc->ccp,
+						       &desc->pending);
+			}
+
+			tx_desc = &desc->tx_desc;
+		} else {
+			tx_desc = NULL;
+		}
+
+		spin_lock_irqsave(&chan->lock, flags);
+
+		if (desc) {
+			if (desc->status != DMA_ERROR)
+				desc->status = DMA_COMPLETE;
+
+			dev_dbg(desc->ccp->dev,
+				"%s - tx %d complete, status=%u\n", __func__,
+				desc->tx_desc.cookie, desc->status);
+
+			dma_cookie_complete(tx_desc);
+			dma_descriptor_unmap(tx_desc);
+		}
+
+		desc = __ccp_next_dma_desc(chan, desc);
+
+		spin_unlock_irqrestore(&chan->lock, flags);
+
+		if (tx_desc) {
+			dmaengine_desc_get_callback_invoke(tx_desc, NULL);
+
+			dma_run_dependencies(tx_desc);
+		}
+	} while (desc);
+
+	return NULL;
+}
+
+static struct ccp_dma_desc *__ccp_pending_to_active(struct ccp_dma_chan *chan)
+{
+	struct ccp_dma_desc *desc;
+
+	if (list_empty(&chan->pending))
+		return NULL;
+
+	desc = list_empty(&chan->active)
+		? list_first_entry(&chan->pending, struct ccp_dma_desc, entry)
+		: NULL;
+
+	list_splice_tail_init(&chan->pending, &chan->active);
+
+	return desc;
+}
+
+static void ccp_cmd_callback(void *data, int err)
+{
+	struct ccp_dma_desc *desc = data;
+	struct ccp_dma_chan *chan;
+	int ret;
+
+	if (err == -EINPROGRESS)
+		return;
+
+	chan = container_of(desc->tx_desc.chan, struct ccp_dma_chan,
+			    dma_chan);
+
+	dev_dbg(chan->ccp->dev, "%s - tx %d callback, err=%d\n",
+		__func__, desc->tx_desc.cookie, err);
+
+	if (err)
+		desc->status = DMA_ERROR;
+
+	while (true) {
+		/* Check for DMA descriptor completion */
+		desc = ccp_handle_active_desc(chan, desc);
+
+		/* Don't submit cmd if no descriptor or DMA is paused */
+		if (!desc || (chan->status == DMA_PAUSED))
+			break;
+
+		ret = ccp_issue_next_cmd(desc);
+		if (!ret)
+			break;
+
+		desc->status = DMA_ERROR;
+	}
+
+	tasklet_schedule(&chan->cleanup_tasklet);
+}
+
+static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc)
+{
+	struct ccp_dma_desc *desc = container_of(tx_desc, struct ccp_dma_desc,
+						 tx_desc);
+	struct ccp_dma_chan *chan;
+	dma_cookie_t cookie;
+	unsigned long flags;
+
+	chan = container_of(tx_desc->chan, struct ccp_dma_chan, dma_chan);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	cookie = dma_cookie_assign(tx_desc);
+	list_del(&desc->entry);
+	list_add_tail(&desc->entry, &chan->pending);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	dev_dbg(chan->ccp->dev, "%s - added tx descriptor %d to pending list\n",
+		__func__, cookie);
+
+	return cookie;
+}
+
+static struct ccp_dma_cmd *ccp_alloc_dma_cmd(struct ccp_dma_chan *chan)
+{
+	struct ccp_dma_cmd *cmd;
+
+	cmd = kmem_cache_alloc(chan->ccp->dma_cmd_cache, GFP_NOWAIT);
+	if (cmd)
+		memset(cmd, 0, sizeof(*cmd));
+
+	return cmd;
+}
+
+static struct ccp_dma_desc *ccp_alloc_dma_desc(struct ccp_dma_chan *chan,
+					       unsigned long flags)
+{
+	struct ccp_dma_desc *desc;
+
+	desc = kmem_cache_zalloc(chan->ccp->dma_desc_cache, GFP_NOWAIT);
+	if (!desc)
+		return NULL;
+
+	dma_async_tx_descriptor_init(&desc->tx_desc, &chan->dma_chan);
+	desc->tx_desc.flags = flags;
+	desc->tx_desc.tx_submit = ccp_tx_submit;
+	desc->ccp = chan->ccp;
+	INIT_LIST_HEAD(&desc->entry);
+	INIT_LIST_HEAD(&desc->pending);
+	INIT_LIST_HEAD(&desc->active);
+	desc->status = DMA_IN_PROGRESS;
+
+	return desc;
+}
+
+static struct ccp_dma_desc *ccp_create_desc(struct dma_chan *dma_chan,
+					    struct scatterlist *dst_sg,
+					    unsigned int dst_nents,
+					    struct scatterlist *src_sg,
+					    unsigned int src_nents,
+					    unsigned long flags)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+	struct ccp_device *ccp = chan->ccp;
+	struct ccp_dma_desc *desc;
+	struct ccp_dma_cmd *cmd;
+	struct ccp_cmd *ccp_cmd;
+	struct ccp_passthru_nomap_engine *ccp_pt;
+	unsigned int src_offset, src_len;
+	unsigned int dst_offset, dst_len;
+	unsigned int len;
+	unsigned long sflags;
+	size_t total_len;
+
+	if (!dst_sg || !src_sg)
+		return NULL;
+
+	if (!dst_nents || !src_nents)
+		return NULL;
+
+	desc = ccp_alloc_dma_desc(chan, flags);
+	if (!desc)
+		return NULL;
+
+	total_len = 0;
+
+	src_len = sg_dma_len(src_sg);
+	src_offset = 0;
+
+	dst_len = sg_dma_len(dst_sg);
+	dst_offset = 0;
+
+	while (true) {
+		if (!src_len) {
+			src_nents--;
+			if (!src_nents)
+				break;
+
+			src_sg = sg_next(src_sg);
+			if (!src_sg)
+				break;
+
+			src_len = sg_dma_len(src_sg);
+			src_offset = 0;
+			continue;
+		}
+
+		if (!dst_len) {
+			dst_nents--;
+			if (!dst_nents)
+				break;
+
+			dst_sg = sg_next(dst_sg);
+			if (!dst_sg)
+				break;
+
+			dst_len = sg_dma_len(dst_sg);
+			dst_offset = 0;
+			continue;
+		}
+
+		len = min(dst_len, src_len);
+
+		cmd = ccp_alloc_dma_cmd(chan);
+		if (!cmd)
+			goto err;
+
+		ccp_cmd = &cmd->ccp_cmd;
+		ccp_cmd->ccp = chan->ccp;
+		ccp_pt = &ccp_cmd->u.passthru_nomap;
+		ccp_cmd->flags = CCP_CMD_MAY_BACKLOG;
+		ccp_cmd->flags |= CCP_CMD_PASSTHRU_NO_DMA_MAP;
+		ccp_cmd->engine = CCP_ENGINE_PASSTHRU;
+		ccp_pt->bit_mod = CCP_PASSTHRU_BITWISE_NOOP;
+		ccp_pt->byte_swap = CCP_PASSTHRU_BYTESWAP_NOOP;
+		ccp_pt->src_dma = sg_dma_address(src_sg) + src_offset;
+		ccp_pt->dst_dma = sg_dma_address(dst_sg) + dst_offset;
+		ccp_pt->src_len = len;
+		ccp_pt->final = 1;
+		ccp_cmd->callback = ccp_cmd_callback;
+		ccp_cmd->data = desc;
+
+		list_add_tail(&cmd->entry, &desc->pending);
+
+		dev_dbg(ccp->dev,
+			"%s - cmd=%p, src=%pad, dst=%pad, len=%llu\n", __func__,
+			cmd, &ccp_pt->src_dma,
+			&ccp_pt->dst_dma, ccp_pt->src_len);
+
+		total_len += len;
+
+		src_len -= len;
+		src_offset += len;
+
+		dst_len -= len;
+		dst_offset += len;
+	}
+
+	desc->len = total_len;
+
+	if (list_empty(&desc->pending))
+		goto err;
+
+	dev_dbg(ccp->dev, "%s - desc=%p\n", __func__, desc);
+
+	spin_lock_irqsave(&chan->lock, sflags);
+
+	list_add_tail(&desc->entry, &chan->created);
+
+	spin_unlock_irqrestore(&chan->lock, sflags);
+
+	return desc;
+
+err:
+	ccp_free_cmd_resources(ccp, &desc->pending);
+	kmem_cache_free(ccp->dma_desc_cache, desc);
+
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *ccp_prep_dma_memcpy(
+	struct dma_chan *dma_chan, dma_addr_t dst, dma_addr_t src, size_t len,
+	unsigned long flags)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+	struct ccp_dma_desc *desc;
+	struct scatterlist dst_sg, src_sg;
+
+	dev_dbg(chan->ccp->dev,
+		"%s - src=%pad, dst=%pad, len=%zu, flags=%#lx\n",
+		__func__, &src, &dst, len, flags);
+
+	sg_init_table(&dst_sg, 1);
+	sg_dma_address(&dst_sg) = dst;
+	sg_dma_len(&dst_sg) = len;
+
+	sg_init_table(&src_sg, 1);
+	sg_dma_address(&src_sg) = src;
+	sg_dma_len(&src_sg) = len;
+
+	desc = ccp_create_desc(dma_chan, &dst_sg, 1, &src_sg, 1, flags);
+	if (!desc)
+		return NULL;
+
+	return &desc->tx_desc;
+}
+
+static struct dma_async_tx_descriptor *ccp_prep_dma_interrupt(
+	struct dma_chan *dma_chan, unsigned long flags)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+	struct ccp_dma_desc *desc;
+
+	desc = ccp_alloc_dma_desc(chan, flags);
+	if (!desc)
+		return NULL;
+
+	return &desc->tx_desc;
+}
+
+static void ccp_issue_pending(struct dma_chan *dma_chan)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+	struct ccp_dma_desc *desc;
+	unsigned long flags;
+
+	dev_dbg(chan->ccp->dev, "%s\n", __func__);
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	desc = __ccp_pending_to_active(chan);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/* If there was nothing active, start processing */
+	if (desc)
+		ccp_cmd_callback(desc, 0);
+}
+
+static enum dma_status ccp_tx_status(struct dma_chan *dma_chan,
+				     dma_cookie_t cookie,
+				     struct dma_tx_state *state)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+	struct ccp_dma_desc *desc;
+	enum dma_status ret;
+	unsigned long flags;
+
+	if (chan->status == DMA_PAUSED) {
+		ret = DMA_PAUSED;
+		goto out;
+	}
+
+	ret = dma_cookie_status(dma_chan, cookie, state);
+	if (ret == DMA_COMPLETE) {
+		spin_lock_irqsave(&chan->lock, flags);
+
+		/* Get status from complete chain, if still there */
+		list_for_each_entry(desc, &chan->complete, entry) {
+			if (desc->tx_desc.cookie != cookie)
+				continue;
+
+			ret = desc->status;
+			break;
+		}
+
+		spin_unlock_irqrestore(&chan->lock, flags);
+	}
+
+out:
+	dev_dbg(chan->ccp->dev, "%s - %u\n", __func__, ret);
+
+	return ret;
+}
+
+static int ccp_pause(struct dma_chan *dma_chan)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+
+	chan->status = DMA_PAUSED;
+
+	/*TODO: Wait for active DMA to complete before returning? */
+
+	return 0;
+}
+
+static int ccp_resume(struct dma_chan *dma_chan)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+	struct ccp_dma_desc *desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	desc = list_first_entry_or_null(&chan->active, struct ccp_dma_desc,
+					entry);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/* Indicate the channel is running again */
+	chan->status = DMA_IN_PROGRESS;
+
+	/* If there was something active, re-start */
+	if (desc)
+		ccp_cmd_callback(desc, 0);
+
+	return 0;
+}
+
+static int ccp_terminate_all(struct dma_chan *dma_chan)
+{
+	struct ccp_dma_chan *chan = container_of(dma_chan, struct ccp_dma_chan,
+						 dma_chan);
+	unsigned long flags;
+
+	dev_dbg(chan->ccp->dev, "%s\n", __func__);
+
+	/*TODO: Wait for active DMA to complete before continuing */
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/*TODO: Purge the complete list? */
+	ccp_free_desc_resources(chan->ccp, &chan->active);
+	ccp_free_desc_resources(chan->ccp, &chan->pending);
+	ccp_free_desc_resources(chan->ccp, &chan->created);
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return 0;
+}
+
+static void ccp_dma_release(struct ccp_device *ccp)
+{
+	struct ccp_dma_chan *chan;
+	struct dma_chan *dma_chan;
+	unsigned int i;
+
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		chan = ccp->ccp_dma_chan + i;
+		dma_chan = &chan->dma_chan;
+		tasklet_kill(&chan->cleanup_tasklet);
+		list_del_rcu(&dma_chan->device_node);
+	}
+}
+
+int ccp_dmaengine_register(struct ccp_device *ccp)
+{
+	struct ccp_dma_chan *chan;
+	struct dma_device *dma_dev = &ccp->dma_dev;
+	struct dma_chan *dma_chan;
+	char *dma_cmd_cache_name;
+	char *dma_desc_cache_name;
+	unsigned int i;
+	int ret;
+
+	ccp->ccp_dma_chan = devm_kcalloc(ccp->dev, ccp->cmd_q_count,
+					 sizeof(*(ccp->ccp_dma_chan)),
+					 GFP_KERNEL);
+	if (!ccp->ccp_dma_chan)
+		return -ENOMEM;
+
+	dma_cmd_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL,
+					    "%s-dmaengine-cmd-cache",
+					    ccp->name);
+	if (!dma_cmd_cache_name)
+		return -ENOMEM;
+
+	ccp->dma_cmd_cache = kmem_cache_create(dma_cmd_cache_name,
+					       sizeof(struct ccp_dma_cmd),
+					       sizeof(void *),
+					       SLAB_HWCACHE_ALIGN, NULL);
+	if (!ccp->dma_cmd_cache)
+		return -ENOMEM;
+
+	dma_desc_cache_name = devm_kasprintf(ccp->dev, GFP_KERNEL,
+					     "%s-dmaengine-desc-cache",
+					     ccp->name);
+	if (!dma_desc_cache_name) {
+		ret = -ENOMEM;
+		goto err_cache;
+	}
+
+	ccp->dma_desc_cache = kmem_cache_create(dma_desc_cache_name,
+						sizeof(struct ccp_dma_desc),
+						sizeof(void *),
+						SLAB_HWCACHE_ALIGN, NULL);
+	if (!ccp->dma_desc_cache) {
+		ret = -ENOMEM;
+		goto err_cache;
+	}
+
+	dma_dev->dev = ccp->dev;
+	dma_dev->src_addr_widths = CCP_DMA_WIDTH(dma_get_mask(ccp->dev));
+	dma_dev->dst_addr_widths = CCP_DMA_WIDTH(dma_get_mask(ccp->dev));
+	dma_dev->directions = DMA_MEM_TO_MEM;
+	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+	dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+
+	/* The DMA channels for this device can be set to public or private,
+	 * and overridden by the module parameter dma_chan_attr.
+	 * Default: according to the value in vdata (dma_chan_attr=0)
+	 * dma_chan_attr=0x1: all channels private (override vdata)
+	 * dma_chan_attr=0x2: all channels public (override vdata)
+	 */
+	if (ccp_get_dma_chan_attr(ccp) == DMA_PRIVATE)
+		dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+	for (i = 0; i < ccp->cmd_q_count; i++) {
+		chan = ccp->ccp_dma_chan + i;
+		dma_chan = &chan->dma_chan;
+
+		chan->ccp = ccp;
+
+		spin_lock_init(&chan->lock);
+		INIT_LIST_HEAD(&chan->created);
+		INIT_LIST_HEAD(&chan->pending);
+		INIT_LIST_HEAD(&chan->active);
+		INIT_LIST_HEAD(&chan->complete);
+
+		tasklet_init(&chan->cleanup_tasklet, ccp_do_cleanup,
+			     (unsigned long)chan);
+
+		dma_chan->device = dma_dev;
+		dma_cookie_init(dma_chan);
+
+		list_add_tail(&dma_chan->device_node, &dma_dev->channels);
+	}
+
+	dma_dev->device_free_chan_resources = ccp_free_chan_resources;
+	dma_dev->device_prep_dma_memcpy = ccp_prep_dma_memcpy;
+	dma_dev->device_prep_dma_interrupt = ccp_prep_dma_interrupt;
+	dma_dev->device_issue_pending = ccp_issue_pending;
+	dma_dev->device_tx_status = ccp_tx_status;
+	dma_dev->device_pause = ccp_pause;
+	dma_dev->device_resume = ccp_resume;
+	dma_dev->device_terminate_all = ccp_terminate_all;
+
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto err_reg;
+
+	return 0;
+
+err_reg:
+	ccp_dma_release(ccp);
+	kmem_cache_destroy(ccp->dma_desc_cache);
+
+err_cache:
+	kmem_cache_destroy(ccp->dma_cmd_cache);
+
+	return ret;
+}
+
+void ccp_dmaengine_unregister(struct ccp_device *ccp)
+{
+	struct dma_device *dma_dev = &ccp->dma_dev;
+
+	dma_async_device_unregister(dma_dev);
+	ccp_dma_release(ccp);
+
+	kmem_cache_destroy(ccp->dma_desc_cache);
+	kmem_cache_destroy(ccp->dma_cmd_cache);
+}
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
new file mode 100644
index 000000000..453d27d2a
--- /dev/null
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -0,0 +1,2520 @@
+/*
+ * AMD Cryptographic Coprocessor (CCP) driver
+ *
+ * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/des.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+
+/* SHA initial context values */
+static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
+	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
+	cpu_to_be32(SHA1_H4),
+};
+
+static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
+	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
+	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
+	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
+};
+
+static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
+	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
+	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
+	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
+	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
+};
+
+static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
+	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
+	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
+	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
+	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
+};
+
+static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
+	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
+	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
+	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
+	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
+};
+
+#define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
+					ccp_gen_jobid(ccp) : 0)
+
+static u32 ccp_gen_jobid(struct ccp_device *ccp)
+{
+	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
+}
+
+static void ccp_sg_free(struct ccp_sg_workarea *wa)
+{
+	if (wa->dma_count)
+		dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir);
+
+	wa->dma_count = 0;
+}
+
+static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
+				struct scatterlist *sg, u64 len,
+				enum dma_data_direction dma_dir)
+{
+	memset(wa, 0, sizeof(*wa));
+
+	wa->sg = sg;
+	if (!sg)
+		return 0;
+
+	wa->nents = sg_nents_for_len(sg, len);
+	if (wa->nents < 0)
+		return wa->nents;
+
+	wa->bytes_left = len;
+	wa->sg_used = 0;
+
+	if (len == 0)
+		return 0;
+
+	if (dma_dir == DMA_NONE)
+		return 0;
+
+	wa->dma_sg = sg;
+	wa->dma_sg_head = sg;
+	wa->dma_dev = dev;
+	wa->dma_dir = dma_dir;
+	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
+	if (!wa->dma_count)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
+{
+	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
+	unsigned int sg_combined_len = 0;
+
+	if (!wa->sg)
+		return;
+
+	wa->sg_used += nbytes;
+	wa->bytes_left -= nbytes;
+	if (wa->sg_used == sg_dma_len(wa->dma_sg)) {
+		/* Advance to the next DMA scatterlist entry */
+		wa->dma_sg = sg_next(wa->dma_sg);
+
+		/* In the case that the DMA mapped scatterlist has entries
+		 * that have been merged, the non-DMA mapped scatterlist
+		 * must be advanced multiple times for each merged entry.
+		 * This ensures that the current non-DMA mapped entry
+		 * corresponds to the current DMA mapped entry.
+		 */
+		do {
+			sg_combined_len += wa->sg->length;
+			wa->sg = sg_next(wa->sg);
+		} while (wa->sg_used > sg_combined_len);
+
+		wa->sg_used = 0;
+	}
+}
+
+static void ccp_dm_free(struct ccp_dm_workarea *wa)
+{
+	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
+		if (wa->address)
+			dma_pool_free(wa->dma_pool, wa->address,
+				      wa->dma.address);
+	} else {
+		if (wa->dma.address)
+			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
+					 wa->dma.dir);
+		kfree(wa->address);
+	}
+
+	wa->address = NULL;
+	wa->dma.address = 0;
+}
+
+static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
+				struct ccp_cmd_queue *cmd_q,
+				unsigned int len,
+				enum dma_data_direction dir)
+{
+	memset(wa, 0, sizeof(*wa));
+
+	if (!len)
+		return 0;
+
+	wa->dev = cmd_q->ccp->dev;
+	wa->length = len;
+
+	if (len <= CCP_DMAPOOL_MAX_SIZE) {
+		wa->dma_pool = cmd_q->dma_pool;
+
+		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
+					     &wa->dma.address);
+		if (!wa->address)
+			return -ENOMEM;
+
+		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
+
+		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
+	} else {
+		wa->address = kzalloc(len, GFP_KERNEL);
+		if (!wa->address)
+			return -ENOMEM;
+
+		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
+						 dir);
+		if (dma_mapping_error(wa->dev, wa->dma.address))
+			return -ENOMEM;
+
+		wa->dma.length = len;
+	}
+	wa->dma.dir = dir;
+
+	return 0;
+}
+
+static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
+			   struct scatterlist *sg, unsigned int sg_offset,
+			   unsigned int len)
+{
+	WARN_ON(!wa->address);
+
+	if (len > (wa->length - wa_offset))
+		return -EINVAL;
+
+	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
+				 0);
+	return 0;
+}
+
+static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
+			    struct scatterlist *sg, unsigned int sg_offset,
+			    unsigned int len)
+{
+	WARN_ON(!wa->address);
+
+	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
+				 1);
+}
+
+static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
+				   unsigned int wa_offset,
+				   struct scatterlist *sg,
+				   unsigned int sg_offset,
+				   unsigned int len)
+{
+	u8 *p, *q;
+	int	rc;
+
+	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
+	if (rc)
+		return rc;
+
+	p = wa->address + wa_offset;
+	q = p + len - 1;
+	while (p < q) {
+		*p = *p ^ *q;
+		*q = *p ^ *q;
+		*p = *p ^ *q;
+		p++;
+		q--;
+	}
+	return 0;
+}
+
+static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
+				    unsigned int wa_offset,
+				    struct scatterlist *sg,
+				    unsigned int sg_offset,
+				    unsigned int len)
+{
+	u8 *p, *q;
+
+	p = wa->address + wa_offset;
+	q = p + len - 1;
+	while (p < q) {
+		*p = *p ^ *q;
+		*q = *p ^ *q;
+		*p = *p ^ *q;
+		p++;
+		q--;
+	}
+
+	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
+}
+
+static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
+{
+	ccp_dm_free(&data->dm_wa);
+	ccp_sg_free(&data->sg_wa);
+}
+
+static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
+			 struct scatterlist *sg, u64 sg_len,
+			 unsigned int dm_len,
+			 enum dma_data_direction dir)
+{
+	int ret;
+
+	memset(data, 0, sizeof(*data));
+
+	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
+				   dir);
+	if (ret)
+		goto e_err;
+
+	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
+	if (ret)
+		goto e_err;
+
+	return 0;
+
+e_err:
+	ccp_free_data(data, cmd_q);
+
+	return ret;
+}
+
+static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
+{
+	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
+	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
+	unsigned int buf_count, nbytes;
+
+	/* Clear the buffer if setting it */
+	if (!from)
+		memset(dm_wa->address, 0, dm_wa->length);
+
+	if (!sg_wa->sg)
+		return 0;
+
+	/* Perform the copy operation
+	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
+	 *   an unsigned int
+	 */
+	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
+	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
+				 nbytes, from);
+
+	/* Update the structures and generate the count */
+	buf_count = 0;
+	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
+		nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used,
+			     dm_wa->length - buf_count);
+		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
+
+		buf_count += nbytes;
+		ccp_update_sg_workarea(sg_wa, nbytes);
+	}
+
+	return buf_count;
+}
+
+static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
+{
+	return ccp_queue_buf(data, 0);
+}
+
+static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
+{
+	return ccp_queue_buf(data, 1);
+}
+
+static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
+			     struct ccp_op *op, unsigned int block_size,
+			     bool blocksize_op)
+{
+	unsigned int sg_src_len, sg_dst_len, op_len;
+
+	/* The CCP can only DMA from/to one address each per operation. This
+	 * requires that we find the smallest DMA area between the source
+	 * and destination. The resulting len values will always be <= UINT_MAX
+	 * because the dma length is an unsigned int.
+	 */
+	sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used;
+	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
+
+	if (dst) {
+		sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used;
+		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
+		op_len = min(sg_src_len, sg_dst_len);
+	} else {
+		op_len = sg_src_len;
+	}
+
+	/* The data operation length will be at least block_size in length
+	 * or the smaller of available sg room remaining for the source or
+	 * the destination
+	 */
+	op_len = max(op_len, block_size);
+
+	/* Unless we have to buffer data, there's no reason to wait */
+	op->soc = 0;
+
+	if (sg_src_len < block_size) {
+		/* Not enough data in the sg element, so it
+		 * needs to be buffered into a blocksize chunk
+		 */
+		int cp_len = ccp_fill_queue_buf(src);
+
+		op->soc = 1;
+		op->src.u.dma.address = src->dm_wa.dma.address;
+		op->src.u.dma.offset = 0;
+		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
+	} else {
+		/* Enough data in the sg element, but we need to
+		 * adjust for any previously copied data
+		 */
+		op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg);
+		op->src.u.dma.offset = src->sg_wa.sg_used;
+		op->src.u.dma.length = op_len & ~(block_size - 1);
+
+		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
+	}
+
+	if (dst) {
+		if (sg_dst_len < block_size) {
+			/* Not enough room in the sg element or we're on the
+			 * last piece of data (when using padding), so the
+			 * output needs to be buffered into a blocksize chunk
+			 */
+			op->soc = 1;
+			op->dst.u.dma.address = dst->dm_wa.dma.address;
+			op->dst.u.dma.offset = 0;
+			op->dst.u.dma.length = op->src.u.dma.length;
+		} else {
+			/* Enough room in the sg element, but we need to
+			 * adjust for any previously used area
+			 */
+			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg);
+			op->dst.u.dma.offset = dst->sg_wa.sg_used;
+			op->dst.u.dma.length = op->src.u.dma.length;
+		}
+	}
+}
+
+static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
+			     struct ccp_op *op)
+{
+	op->init = 0;
+
+	if (dst) {
+		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
+			ccp_empty_queue_buf(dst);
+		else
+			ccp_update_sg_workarea(&dst->sg_wa,
+					       op->dst.u.dma.length);
+	}
+}
+
+static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
+			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+			       u32 byte_swap, bool from)
+{
+	struct ccp_op op;
+
+	memset(&op, 0, sizeof(op));
+
+	op.cmd_q = cmd_q;
+	op.jobid = jobid;
+	op.eom = 1;
+
+	if (from) {
+		op.soc = 1;
+		op.src.type = CCP_MEMTYPE_SB;
+		op.src.u.sb = sb;
+		op.dst.type = CCP_MEMTYPE_SYSTEM;
+		op.dst.u.dma.address = wa->dma.address;
+		op.dst.u.dma.length = wa->length;
+	} else {
+		op.src.type = CCP_MEMTYPE_SYSTEM;
+		op.src.u.dma.address = wa->dma.address;
+		op.src.u.dma.length = wa->length;
+		op.dst.type = CCP_MEMTYPE_SB;
+		op.dst.u.sb = sb;
+	}
+
+	op.u.passthru.byte_swap = byte_swap;
+
+	return cmd_q->ccp->vdata->perform->passthru(&op);
+}
+
+static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
+			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+			  u32 byte_swap)
+{
+	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
+}
+
+static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
+			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
+			    u32 byte_swap)
+{
+	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
+}
+
+static noinline_for_stack int
+ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_aes_engine *aes = &cmd->u.aes;
+	struct ccp_dm_workarea key, ctx;
+	struct ccp_data src;
+	struct ccp_op op;
+	unsigned int dm_offset;
+	int ret;
+
+	if (!((aes->key_len == AES_KEYSIZE_128) ||
+	      (aes->key_len == AES_KEYSIZE_192) ||
+	      (aes->key_len == AES_KEYSIZE_256)))
+		return -EINVAL;
+
+	if (aes->src_len & (AES_BLOCK_SIZE - 1))
+		return -EINVAL;
+
+	if (aes->iv_len != AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (!aes->key || !aes->iv || !aes->src)
+		return -EINVAL;
+
+	if (aes->cmac_final) {
+		if (aes->cmac_key_len != AES_BLOCK_SIZE)
+			return -EINVAL;
+
+		if (!aes->cmac_key)
+			return -EINVAL;
+	}
+
+	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
+	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
+
+	ret = -EIO;
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_key = cmd_q->sb_key;
+	op.sb_ctx = cmd_q->sb_ctx;
+	op.init = 1;
+	op.u.aes.type = aes->type;
+	op.u.aes.mode = aes->mode;
+	op.u.aes.action = aes->action;
+
+	/* All supported key sizes fit in a single (32-byte) SB entry
+	 * and must be in little endian format. Use the 256-bit byte
+	 * swap passthru option to convert from big endian to little
+	 * endian.
+	 */
+	ret = ccp_init_dm_workarea(&key, cmd_q,
+				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	dm_offset = CCP_SB_BYTES - aes->key_len;
+	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	if (ret)
+		goto e_key;
+	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_key;
+	}
+
+	/* The AES context fits in a single (32-byte) SB entry and
+	 * must be in little endian format. Use the 256-bit byte swap
+	 * passthru option to convert from big endian to little endian.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
+				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		goto e_key;
+
+	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
+	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	if (ret)
+		goto e_ctx;
+	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_ctx;
+	}
+
+	/* Send data to the CCP AES engine */
+	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
+			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
+	if (ret)
+		goto e_ctx;
+
+	while (src.sg_wa.bytes_left) {
+		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
+		if (aes->cmac_final && !src.sg_wa.bytes_left) {
+			op.eom = 1;
+
+			/* Push the K1/K2 key to the CCP now */
+			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
+					       op.sb_ctx,
+					       CCP_PASSTHRU_BYTESWAP_256BIT);
+			if (ret) {
+				cmd->engine_error = cmd_q->cmd_error;
+				goto e_src;
+			}
+
+			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
+					      aes->cmac_key_len);
+			if (ret)
+				goto e_src;
+			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+					     CCP_PASSTHRU_BYTESWAP_256BIT);
+			if (ret) {
+				cmd->engine_error = cmd_q->cmd_error;
+				goto e_src;
+			}
+		}
+
+		ret = cmd_q->ccp->vdata->perform->aes(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_src;
+		}
+
+		ccp_process_data(&src, NULL, &op);
+	}
+
+	/* Retrieve the AES context - convert from LE to BE using
+	 * 32-byte (256-bit) byteswapping
+	 */
+	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			       CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_src;
+	}
+
+	/* ...but we only need AES_BLOCK_SIZE bytes */
+	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
+	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+e_key:
+	ccp_dm_free(&key);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_aes_engine *aes = &cmd->u.aes;
+	struct ccp_dm_workarea key, ctx, final_wa, tag;
+	struct ccp_data src, dst;
+	struct ccp_data aad;
+	struct ccp_op op;
+
+	unsigned long long *final;
+	unsigned int dm_offset;
+	unsigned int authsize;
+	unsigned int jobid;
+	unsigned int ilen;
+	bool in_place = true; /* Default value */
+	int ret;
+
+	struct scatterlist *p_inp, sg_inp[2];
+	struct scatterlist *p_tag, sg_tag[2];
+	struct scatterlist *p_outp, sg_outp[2];
+	struct scatterlist *p_aad;
+
+	if (!aes->iv)
+		return -EINVAL;
+
+	if (!((aes->key_len == AES_KEYSIZE_128) ||
+		(aes->key_len == AES_KEYSIZE_192) ||
+		(aes->key_len == AES_KEYSIZE_256)))
+		return -EINVAL;
+
+	if (!aes->key) /* Gotta have a key SGL */
+		return -EINVAL;
+
+	/* Zero defaults to 16 bytes, the maximum size */
+	authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE;
+	switch (authsize) {
+	case 16:
+	case 15:
+	case 14:
+	case 13:
+	case 12:
+	case 8:
+	case 4:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* First, decompose the source buffer into AAD & PT,
+	 * and the destination buffer into AAD, CT & tag, or
+	 * the input into CT & tag.
+	 * It is expected that the input and output SGs will
+	 * be valid, even if the AAD and input lengths are 0.
+	 */
+	p_aad = aes->src;
+	p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
+	p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
+	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
+		ilen = aes->src_len;
+		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
+	} else {
+		/* Input length for decryption includes tag */
+		ilen = aes->src_len - authsize;
+		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
+	}
+
+	jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = jobid;
+	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
+	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
+	op.init = 1;
+	op.u.aes.type = aes->type;
+
+	/* Copy the key to the LSB */
+	ret = ccp_init_dm_workarea(&key, cmd_q,
+				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	dm_offset = CCP_SB_BYTES - aes->key_len;
+	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	if (ret)
+		goto e_key;
+	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_key;
+	}
+
+	/* Copy the context (IV) to the LSB.
+	 * There is an assumption here that the IV is 96 bits in length, plus
+	 * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
+				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		goto e_key;
+
+	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
+	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	if (ret)
+		goto e_ctx;
+
+	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_ctx;
+	}
+
+	op.init = 1;
+	if (aes->aad_len > 0) {
+		/* Step 1: Run a GHASH over the Additional Authenticated Data */
+		ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len,
+				    AES_BLOCK_SIZE,
+				    DMA_TO_DEVICE);
+		if (ret)
+			goto e_ctx;
+
+		op.u.aes.mode = CCP_AES_MODE_GHASH;
+		op.u.aes.action = CCP_AES_GHASHAAD;
+
+		while (aad.sg_wa.bytes_left) {
+			ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true);
+
+			ret = cmd_q->ccp->vdata->perform->aes(&op);
+			if (ret) {
+				cmd->engine_error = cmd_q->cmd_error;
+				goto e_aad;
+			}
+
+			ccp_process_data(&aad, NULL, &op);
+			op.init = 0;
+		}
+	}
+
+	op.u.aes.mode = CCP_AES_MODE_GCTR;
+	op.u.aes.action = aes->action;
+
+	if (ilen > 0) {
+		/* Step 2: Run a GCTR over the plaintext */
+		in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
+
+		ret = ccp_init_data(&src, cmd_q, p_inp, ilen,
+				    AES_BLOCK_SIZE,
+				    in_place ? DMA_BIDIRECTIONAL
+					     : DMA_TO_DEVICE);
+		if (ret)
+			goto e_aad;
+
+		if (in_place) {
+			dst = src;
+		} else {
+			ret = ccp_init_data(&dst, cmd_q, p_outp, ilen,
+					    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
+			if (ret)
+				goto e_src;
+		}
+
+		op.soc = 0;
+		op.eom = 0;
+		op.init = 1;
+		while (src.sg_wa.bytes_left) {
+			ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
+			if (!src.sg_wa.bytes_left) {
+				unsigned int nbytes = ilen % AES_BLOCK_SIZE;
+
+				if (nbytes) {
+					op.eom = 1;
+					op.u.aes.size = (nbytes * 8) - 1;
+				}
+			}
+
+			ret = cmd_q->ccp->vdata->perform->aes(&op);
+			if (ret) {
+				cmd->engine_error = cmd_q->cmd_error;
+				goto e_dst;
+			}
+
+			ccp_process_data(&src, &dst, &op);
+			op.init = 0;
+		}
+	}
+
+	/* Step 3: Update the IV portion of the context with the original IV */
+	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			       CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	if (ret)
+		goto e_dst;
+
+	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	/* Step 4: Concatenate the lengths of the AAD and source, and
+	 * hash that 16 byte buffer.
+	 */
+	ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		goto e_dst;
+	final = (unsigned long long *) final_wa.address;
+	final[0] = cpu_to_be64(aes->aad_len * 8);
+	final[1] = cpu_to_be64(ilen * 8);
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = jobid;
+	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
+	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
+	op.init = 1;
+	op.u.aes.type = aes->type;
+	op.u.aes.mode = CCP_AES_MODE_GHASH;
+	op.u.aes.action = CCP_AES_GHASHFINAL;
+	op.src.type = CCP_MEMTYPE_SYSTEM;
+	op.src.u.dma.address = final_wa.dma.address;
+	op.src.u.dma.length = AES_BLOCK_SIZE;
+	op.dst.type = CCP_MEMTYPE_SYSTEM;
+	op.dst.u.dma.address = final_wa.dma.address;
+	op.dst.u.dma.length = AES_BLOCK_SIZE;
+	op.eom = 1;
+	op.u.aes.size = 0;
+	ret = cmd_q->ccp->vdata->perform->aes(&op);
+	if (ret)
+		goto e_final_wa;
+
+	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
+		/* Put the ciphered tag after the ciphertext. */
+		ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize);
+	} else {
+		/* Does this ciphered tag match the input? */
+		ret = ccp_init_dm_workarea(&tag, cmd_q, authsize,
+					   DMA_BIDIRECTIONAL);
+		if (ret)
+			goto e_final_wa;
+		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize);
+		if (ret) {
+			ccp_dm_free(&tag);
+			goto e_final_wa;
+		}
+
+		ret = crypto_memneq(tag.address, final_wa.address,
+				    authsize) ? -EBADMSG : 0;
+		ccp_dm_free(&tag);
+	}
+
+e_final_wa:
+	ccp_dm_free(&final_wa);
+
+e_dst:
+	if (ilen > 0 && !in_place)
+		ccp_free_data(&dst, cmd_q);
+
+e_src:
+	if (ilen > 0)
+		ccp_free_data(&src, cmd_q);
+
+e_aad:
+	if (aes->aad_len)
+		ccp_free_data(&aad, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+e_key:
+	ccp_dm_free(&key);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_aes_engine *aes = &cmd->u.aes;
+	struct ccp_dm_workarea key, ctx;
+	struct ccp_data src, dst;
+	struct ccp_op op;
+	unsigned int dm_offset;
+	bool in_place = false;
+	int ret;
+
+	if (!((aes->key_len == AES_KEYSIZE_128) ||
+	      (aes->key_len == AES_KEYSIZE_192) ||
+	      (aes->key_len == AES_KEYSIZE_256)))
+		return -EINVAL;
+
+	if (((aes->mode == CCP_AES_MODE_ECB) ||
+	     (aes->mode == CCP_AES_MODE_CBC) ||
+	     (aes->mode == CCP_AES_MODE_CFB)) &&
+	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
+		return -EINVAL;
+
+	if (!aes->key || !aes->src || !aes->dst)
+		return -EINVAL;
+
+	if (aes->mode != CCP_AES_MODE_ECB) {
+		if (aes->iv_len != AES_BLOCK_SIZE)
+			return -EINVAL;
+
+		if (!aes->iv)
+			return -EINVAL;
+	}
+
+	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
+	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
+
+	ret = -EIO;
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_key = cmd_q->sb_key;
+	op.sb_ctx = cmd_q->sb_ctx;
+	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
+	op.u.aes.type = aes->type;
+	op.u.aes.mode = aes->mode;
+	op.u.aes.action = aes->action;
+
+	/* All supported key sizes fit in a single (32-byte) SB entry
+	 * and must be in little endian format. Use the 256-bit byte
+	 * swap passthru option to convert from big endian to little
+	 * endian.
+	 */
+	ret = ccp_init_dm_workarea(&key, cmd_q,
+				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	dm_offset = CCP_SB_BYTES - aes->key_len;
+	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
+	if (ret)
+		goto e_key;
+	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_key;
+	}
+
+	/* The AES context fits in a single (32-byte) SB entry and
+	 * must be in little endian format. Use the 256-bit byte swap
+	 * passthru option to convert from big endian to little endian.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
+				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		goto e_key;
+
+	if (aes->mode != CCP_AES_MODE_ECB) {
+		/* Load the AES context - convert to LE */
+		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
+		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+		if (ret)
+			goto e_ctx;
+		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+				     CCP_PASSTHRU_BYTESWAP_256BIT);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_ctx;
+		}
+	}
+	switch (aes->mode) {
+	case CCP_AES_MODE_CFB: /* CFB128 only */
+	case CCP_AES_MODE_CTR:
+		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
+		break;
+	default:
+		op.u.aes.size = 0;
+	}
+
+	/* Prepare the input and output data workareas. For in-place
+	 * operations we need to set the dma direction to BIDIRECTIONAL
+	 * and copy the src workarea to the dst workarea.
+	 */
+	if (sg_virt(aes->src) == sg_virt(aes->dst))
+		in_place = true;
+
+	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
+			    AES_BLOCK_SIZE,
+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+	if (ret)
+		goto e_ctx;
+
+	if (in_place) {
+		dst = src;
+	} else {
+		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
+				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
+		if (ret)
+			goto e_src;
+	}
+
+	/* Send data to the CCP AES engine */
+	while (src.sg_wa.bytes_left) {
+		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
+		if (!src.sg_wa.bytes_left) {
+			op.eom = 1;
+
+			/* Since we don't retrieve the AES context in ECB
+			 * mode we have to wait for the operation to complete
+			 * on the last piece of data
+			 */
+			if (aes->mode == CCP_AES_MODE_ECB)
+				op.soc = 1;
+		}
+
+		ret = cmd_q->ccp->vdata->perform->aes(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		ccp_process_data(&src, &dst, &op);
+	}
+
+	if (aes->mode != CCP_AES_MODE_ECB) {
+		/* Retrieve the AES context - convert from LE to BE using
+		 * 32-byte (256-bit) byteswapping
+		 */
+		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+				       CCP_PASSTHRU_BYTESWAP_256BIT);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		/* ...but we only need AES_BLOCK_SIZE bytes */
+		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
+		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
+	}
+
+e_dst:
+	if (!in_place)
+		ccp_free_data(&dst, cmd_q);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+e_key:
+	ccp_dm_free(&key);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
+	struct ccp_dm_workarea key, ctx;
+	struct ccp_data src, dst;
+	struct ccp_op op;
+	unsigned int unit_size, dm_offset;
+	bool in_place = false;
+	unsigned int sb_count;
+	enum ccp_aes_type aestype;
+	int ret;
+
+	switch (xts->unit_size) {
+	case CCP_XTS_AES_UNIT_SIZE_16:
+		unit_size = 16;
+		break;
+	case CCP_XTS_AES_UNIT_SIZE_512:
+		unit_size = 512;
+		break;
+	case CCP_XTS_AES_UNIT_SIZE_1024:
+		unit_size = 1024;
+		break;
+	case CCP_XTS_AES_UNIT_SIZE_2048:
+		unit_size = 2048;
+		break;
+	case CCP_XTS_AES_UNIT_SIZE_4096:
+		unit_size = 4096;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (xts->key_len == AES_KEYSIZE_128)
+		aestype = CCP_AES_TYPE_128;
+	else if (xts->key_len == AES_KEYSIZE_256)
+		aestype = CCP_AES_TYPE_256;
+	else
+		return -EINVAL;
+
+	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
+		return -EINVAL;
+
+	if (xts->iv_len != AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
+		return -EINVAL;
+
+	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
+	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
+
+	ret = -EIO;
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_key = cmd_q->sb_key;
+	op.sb_ctx = cmd_q->sb_ctx;
+	op.init = 1;
+	op.u.xts.type = aestype;
+	op.u.xts.action = xts->action;
+	op.u.xts.unit_size = xts->unit_size;
+
+	/* A version 3 device only supports 128-bit keys, which fits into a
+	 * single SB entry. A version 5 device uses a 512-bit vector, so two
+	 * SB entries.
+	 */
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
+		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
+	else
+		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
+	ret = ccp_init_dm_workarea(&key, cmd_q,
+				   sb_count * CCP_SB_BYTES,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
+		/* All supported key sizes must be in little endian format.
+		 * Use the 256-bit byte swap passthru option to convert from
+		 * big endian to little endian.
+		 */
+		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
+		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
+		if (ret)
+			goto e_key;
+		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
+		if (ret)
+			goto e_key;
+	} else {
+		/* Version 5 CCPs use a 512-bit space for the key: each portion
+		 * occupies 256 bits, or one entire slot, and is zero-padded.
+		 */
+		unsigned int pad;
+
+		dm_offset = CCP_SB_BYTES;
+		pad = dm_offset - xts->key_len;
+		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
+		if (ret)
+			goto e_key;
+		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
+				      xts->key_len, xts->key_len);
+		if (ret)
+			goto e_key;
+	}
+	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_key;
+	}
+
+	/* The AES context fits in a single (32-byte) SB entry and
+	 * for XTS is already in little endian format so no byte swapping
+	 * is needed.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q,
+				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		goto e_key;
+
+	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
+	if (ret)
+		goto e_ctx;
+	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			     CCP_PASSTHRU_BYTESWAP_NOOP);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_ctx;
+	}
+
+	/* Prepare the input and output data workareas. For in-place
+	 * operations we need to set the dma direction to BIDIRECTIONAL
+	 * and copy the src workarea to the dst workarea.
+	 */
+	if (sg_virt(xts->src) == sg_virt(xts->dst))
+		in_place = true;
+
+	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
+			    unit_size,
+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+	if (ret)
+		goto e_ctx;
+
+	if (in_place) {
+		dst = src;
+	} else {
+		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
+				    unit_size, DMA_FROM_DEVICE);
+		if (ret)
+			goto e_src;
+	}
+
+	/* Send data to the CCP AES engine */
+	while (src.sg_wa.bytes_left) {
+		ccp_prepare_data(&src, &dst, &op, unit_size, true);
+		if (!src.sg_wa.bytes_left)
+			op.eom = 1;
+
+		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		ccp_process_data(&src, &dst, &op);
+	}
+
+	/* Retrieve the AES context - convert from LE to BE using
+	 * 32-byte (256-bit) byteswapping
+	 */
+	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			       CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	/* ...but we only need AES_BLOCK_SIZE bytes */
+	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
+	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
+
+e_dst:
+	if (!in_place)
+		ccp_free_data(&dst, cmd_q);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+e_key:
+	ccp_dm_free(&key);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_des3_engine *des3 = &cmd->u.des3;
+
+	struct ccp_dm_workarea key, ctx;
+	struct ccp_data src, dst;
+	struct ccp_op op;
+	unsigned int dm_offset;
+	unsigned int len_singlekey;
+	bool in_place = false;
+	int ret;
+
+	/* Error checks */
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
+		return -EINVAL;
+
+	if (!cmd_q->ccp->vdata->perform->des3)
+		return -EINVAL;
+
+	if (des3->key_len != DES3_EDE_KEY_SIZE)
+		return -EINVAL;
+
+	if (((des3->mode == CCP_DES3_MODE_ECB) ||
+		(des3->mode == CCP_DES3_MODE_CBC)) &&
+		(des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
+		return -EINVAL;
+
+	if (!des3->key || !des3->src || !des3->dst)
+		return -EINVAL;
+
+	if (des3->mode != CCP_DES3_MODE_ECB) {
+		if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
+			return -EINVAL;
+
+		if (!des3->iv)
+			return -EINVAL;
+	}
+
+	ret = -EIO;
+	/* Zero out all the fields of the command desc */
+	memset(&op, 0, sizeof(op));
+
+	/* Set up the Function field */
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_key = cmd_q->sb_key;
+
+	op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
+	op.u.des3.type = des3->type;
+	op.u.des3.mode = des3->mode;
+	op.u.des3.action = des3->action;
+
+	/*
+	 * All supported key sizes fit in a single (32-byte) KSB entry and
+	 * (like AES) must be in little endian format. Use the 256-bit byte
+	 * swap passthru option to convert from big endian to little endian.
+	 */
+	ret = ccp_init_dm_workarea(&key, cmd_q,
+				   CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	/*
+	 * The contents of the key triplet are in the reverse order of what
+	 * is required by the engine. Copy the 3 pieces individually to put
+	 * them where they belong.
+	 */
+	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
+
+	len_singlekey = des3->key_len / 3;
+	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
+			      des3->key, 0, len_singlekey);
+	if (ret)
+		goto e_key;
+	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
+			      des3->key, len_singlekey, len_singlekey);
+	if (ret)
+		goto e_key;
+	ret = ccp_set_dm_area(&key, dm_offset,
+			      des3->key, 2 * len_singlekey, len_singlekey);
+	if (ret)
+		goto e_key;
+
+	/* Copy the key to the SB */
+	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_key;
+	}
+
+	/*
+	 * The DES3 context fits in a single (32-byte) KSB entry and
+	 * must be in little endian format. Use the 256-bit byte swap
+	 * passthru option to convert from big endian to little endian.
+	 */
+	if (des3->mode != CCP_DES3_MODE_ECB) {
+		op.sb_ctx = cmd_q->sb_ctx;
+
+		ret = ccp_init_dm_workarea(&ctx, cmd_q,
+					   CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
+					   DMA_BIDIRECTIONAL);
+		if (ret)
+			goto e_key;
+
+		/* Load the context into the LSB */
+		dm_offset = CCP_SB_BYTES - des3->iv_len;
+		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
+				      des3->iv_len);
+		if (ret)
+			goto e_ctx;
+
+		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+				     CCP_PASSTHRU_BYTESWAP_256BIT);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_ctx;
+		}
+	}
+
+	/*
+	 * Prepare the input and output data workareas. For in-place
+	 * operations we need to set the dma direction to BIDIRECTIONAL
+	 * and copy the src workarea to the dst workarea.
+	 */
+	if (sg_virt(des3->src) == sg_virt(des3->dst))
+		in_place = true;
+
+	ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
+			DES3_EDE_BLOCK_SIZE,
+			in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+	if (ret)
+		goto e_ctx;
+
+	if (in_place)
+		dst = src;
+	else {
+		ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
+				DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
+		if (ret)
+			goto e_src;
+	}
+
+	/* Send data to the CCP DES3 engine */
+	while (src.sg_wa.bytes_left) {
+		ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
+		if (!src.sg_wa.bytes_left) {
+			op.eom = 1;
+
+			/* Since we don't retrieve the context in ECB mode
+			 * we have to wait for the operation to complete
+			 * on the last piece of data
+			 */
+			op.soc = 0;
+		}
+
+		ret = cmd_q->ccp->vdata->perform->des3(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		ccp_process_data(&src, &dst, &op);
+	}
+
+	if (des3->mode != CCP_DES3_MODE_ECB) {
+		/* Retrieve the context and make BE */
+		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+				       CCP_PASSTHRU_BYTESWAP_256BIT);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
+		ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
+				DES3_EDE_BLOCK_SIZE);
+	}
+e_dst:
+	if (!in_place)
+		ccp_free_data(&dst, cmd_q);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	if (des3->mode != CCP_DES3_MODE_ECB)
+		ccp_dm_free(&ctx);
+
+e_key:
+	ccp_dm_free(&key);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_sha_engine *sha = &cmd->u.sha;
+	struct ccp_dm_workarea ctx;
+	struct ccp_data src;
+	struct ccp_op op;
+	unsigned int ioffset, ooffset;
+	unsigned int digest_size;
+	int sb_count;
+	const void *init;
+	u64 block_size;
+	int ctx_size;
+	int ret;
+
+	switch (sha->type) {
+	case CCP_SHA_TYPE_1:
+		if (sha->ctx_len < SHA1_DIGEST_SIZE)
+			return -EINVAL;
+		block_size = SHA1_BLOCK_SIZE;
+		break;
+	case CCP_SHA_TYPE_224:
+		if (sha->ctx_len < SHA224_DIGEST_SIZE)
+			return -EINVAL;
+		block_size = SHA224_BLOCK_SIZE;
+		break;
+	case CCP_SHA_TYPE_256:
+		if (sha->ctx_len < SHA256_DIGEST_SIZE)
+			return -EINVAL;
+		block_size = SHA256_BLOCK_SIZE;
+		break;
+	case CCP_SHA_TYPE_384:
+		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
+		    || sha->ctx_len < SHA384_DIGEST_SIZE)
+			return -EINVAL;
+		block_size = SHA384_BLOCK_SIZE;
+		break;
+	case CCP_SHA_TYPE_512:
+		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
+		    || sha->ctx_len < SHA512_DIGEST_SIZE)
+			return -EINVAL;
+		block_size = SHA512_BLOCK_SIZE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!sha->ctx)
+		return -EINVAL;
+
+	if (!sha->final && (sha->src_len & (block_size - 1)))
+		return -EINVAL;
+
+	/* The version 3 device can't handle zero-length input */
+	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
+
+		if (!sha->src_len) {
+			unsigned int digest_len;
+			const u8 *sha_zero;
+
+			/* Not final, just return */
+			if (!sha->final)
+				return 0;
+
+			/* CCP can't do a zero length sha operation so the
+			 * caller must buffer the data.
+			 */
+			if (sha->msg_bits)
+				return -EINVAL;
+
+			/* The CCP cannot perform zero-length sha operations
+			 * so the caller is required to buffer data for the
+			 * final operation. However, a sha operation for a
+			 * message with a total length of zero is valid so
+			 * known values are required to supply the result.
+			 */
+			switch (sha->type) {
+			case CCP_SHA_TYPE_1:
+				sha_zero = sha1_zero_message_hash;
+				digest_len = SHA1_DIGEST_SIZE;
+				break;
+			case CCP_SHA_TYPE_224:
+				sha_zero = sha224_zero_message_hash;
+				digest_len = SHA224_DIGEST_SIZE;
+				break;
+			case CCP_SHA_TYPE_256:
+				sha_zero = sha256_zero_message_hash;
+				digest_len = SHA256_DIGEST_SIZE;
+				break;
+			default:
+				return -EINVAL;
+			}
+
+			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
+						 digest_len, 1);
+
+			return 0;
+		}
+	}
+
+	/* Set variables used throughout */
+	switch (sha->type) {
+	case CCP_SHA_TYPE_1:
+		digest_size = SHA1_DIGEST_SIZE;
+		init = (void *) ccp_sha1_init;
+		ctx_size = SHA1_DIGEST_SIZE;
+		sb_count = 1;
+		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
+			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
+		else
+			ooffset = ioffset = 0;
+		break;
+	case CCP_SHA_TYPE_224:
+		digest_size = SHA224_DIGEST_SIZE;
+		init = (void *) ccp_sha224_init;
+		ctx_size = SHA256_DIGEST_SIZE;
+		sb_count = 1;
+		ioffset = 0;
+		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
+			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
+		else
+			ooffset = 0;
+		break;
+	case CCP_SHA_TYPE_256:
+		digest_size = SHA256_DIGEST_SIZE;
+		init = (void *) ccp_sha256_init;
+		ctx_size = SHA256_DIGEST_SIZE;
+		sb_count = 1;
+		ooffset = ioffset = 0;
+		break;
+	case CCP_SHA_TYPE_384:
+		digest_size = SHA384_DIGEST_SIZE;
+		init = (void *) ccp_sha384_init;
+		ctx_size = SHA512_DIGEST_SIZE;
+		sb_count = 2;
+		ioffset = 0;
+		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
+		break;
+	case CCP_SHA_TYPE_512:
+		digest_size = SHA512_DIGEST_SIZE;
+		init = (void *) ccp_sha512_init;
+		ctx_size = SHA512_DIGEST_SIZE;
+		sb_count = 2;
+		ooffset = ioffset = 0;
+		break;
+	default:
+		ret = -EINVAL;
+		goto e_data;
+	}
+
+	/* For zero-length plaintext the src pointer is ignored;
+	 * otherwise both parts must be valid
+	 */
+	if (sha->src_len && !sha->src)
+		return -EINVAL;
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
+	op.u.sha.type = sha->type;
+	op.u.sha.msg_bits = sha->msg_bits;
+
+	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
+	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
+	 * first slot, and the left half in the second. Each portion must then
+	 * be in little endian format: use the 256-bit byte swap option.
+	 */
+	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
+				   DMA_BIDIRECTIONAL);
+	if (ret)
+		return ret;
+	if (sha->first) {
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+		case CCP_SHA_TYPE_224:
+		case CCP_SHA_TYPE_256:
+			memcpy(ctx.address + ioffset, init, ctx_size);
+			break;
+		case CCP_SHA_TYPE_384:
+		case CCP_SHA_TYPE_512:
+			memcpy(ctx.address + ctx_size / 2, init,
+			       ctx_size / 2);
+			memcpy(ctx.address, init + ctx_size / 2,
+			       ctx_size / 2);
+			break;
+		default:
+			ret = -EINVAL;
+			goto e_ctx;
+		}
+	} else {
+		/* Restore the context */
+		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
+				      sb_count * CCP_SB_BYTES);
+		if (ret)
+			goto e_ctx;
+	}
+
+	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			     CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_ctx;
+	}
+
+	if (sha->src) {
+		/* Send data to the CCP SHA engine; block_size is set above */
+		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
+				    block_size, DMA_TO_DEVICE);
+		if (ret)
+			goto e_ctx;
+
+		while (src.sg_wa.bytes_left) {
+			ccp_prepare_data(&src, NULL, &op, block_size, false);
+			if (sha->final && !src.sg_wa.bytes_left)
+				op.eom = 1;
+
+			ret = cmd_q->ccp->vdata->perform->sha(&op);
+			if (ret) {
+				cmd->engine_error = cmd_q->cmd_error;
+				goto e_data;
+			}
+
+			ccp_process_data(&src, NULL, &op);
+		}
+	} else {
+		op.eom = 1;
+		ret = cmd_q->ccp->vdata->perform->sha(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_data;
+		}
+	}
+
+	/* Retrieve the SHA context - convert from LE to BE using
+	 * 32-byte (256-bit) byteswapping to BE
+	 */
+	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
+			       CCP_PASSTHRU_BYTESWAP_256BIT);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_data;
+	}
+
+	if (sha->final) {
+		/* Finishing up, so get the digest */
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+		case CCP_SHA_TYPE_224:
+		case CCP_SHA_TYPE_256:
+			ccp_get_dm_area(&ctx, ooffset,
+					sha->ctx, 0,
+					digest_size);
+			break;
+		case CCP_SHA_TYPE_384:
+		case CCP_SHA_TYPE_512:
+			ccp_get_dm_area(&ctx, 0,
+					sha->ctx, LSB_ITEM_SIZE - ooffset,
+					LSB_ITEM_SIZE);
+			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
+					sha->ctx, 0,
+					LSB_ITEM_SIZE - ooffset);
+			break;
+		default:
+			ret = -EINVAL;
+			goto e_data;
+		}
+	} else {
+		/* Stash the context */
+		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
+				sb_count * CCP_SB_BYTES);
+	}
+
+	if (sha->final && sha->opad) {
+		/* HMAC operation, recursively perform final SHA */
+		struct ccp_cmd hmac_cmd;
+		struct scatterlist sg;
+		u8 *hmac_buf;
+
+		if (sha->opad_len != block_size) {
+			ret = -EINVAL;
+			goto e_data;
+		}
+
+		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
+		if (!hmac_buf) {
+			ret = -ENOMEM;
+			goto e_data;
+		}
+		sg_init_one(&sg, hmac_buf, block_size + digest_size);
+
+		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
+		switch (sha->type) {
+		case CCP_SHA_TYPE_1:
+		case CCP_SHA_TYPE_224:
+		case CCP_SHA_TYPE_256:
+			memcpy(hmac_buf + block_size,
+			       ctx.address + ooffset,
+			       digest_size);
+			break;
+		case CCP_SHA_TYPE_384:
+		case CCP_SHA_TYPE_512:
+			memcpy(hmac_buf + block_size,
+			       ctx.address + LSB_ITEM_SIZE + ooffset,
+			       LSB_ITEM_SIZE);
+			memcpy(hmac_buf + block_size +
+			       (LSB_ITEM_SIZE - ooffset),
+			       ctx.address,
+			       LSB_ITEM_SIZE);
+			break;
+		default:
+			kfree(hmac_buf);
+			ret = -EINVAL;
+			goto e_data;
+		}
+
+		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
+		hmac_cmd.engine = CCP_ENGINE_SHA;
+		hmac_cmd.u.sha.type = sha->type;
+		hmac_cmd.u.sha.ctx = sha->ctx;
+		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
+		hmac_cmd.u.sha.src = &sg;
+		hmac_cmd.u.sha.src_len = block_size + digest_size;
+		hmac_cmd.u.sha.opad = NULL;
+		hmac_cmd.u.sha.opad_len = 0;
+		hmac_cmd.u.sha.first = 1;
+		hmac_cmd.u.sha.final = 1;
+		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
+
+		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
+		if (ret)
+			cmd->engine_error = hmac_cmd.engine_error;
+
+		kfree(hmac_buf);
+	}
+
+e_data:
+	if (sha->src)
+		ccp_free_data(&src, cmd_q);
+
+e_ctx:
+	ccp_dm_free(&ctx);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
+	struct ccp_dm_workarea exp, src, dst;
+	struct ccp_op op;
+	unsigned int sb_count, i_len, o_len;
+	int ret;
+
+	/* Check against the maximum allowable size, in bits */
+	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
+		return -EINVAL;
+
+	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
+		return -EINVAL;
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
+	/* The RSA modulus must precede the message being acted upon, so
+	 * it must be copied to a DMA area where the message and the
+	 * modulus can be concatenated.  Therefore the input buffer
+	 * length required is twice the output buffer length (which
+	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
+	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
+	 * required.
+	 */
+	o_len = 32 * ((rsa->key_size + 255) / 256);
+	i_len = o_len * 2;
+
+	sb_count = 0;
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
+		/* sb_count is the number of storage block slots required
+		 * for the modulus.
+		 */
+		sb_count = o_len / CCP_SB_BYTES;
+		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
+								sb_count);
+		if (!op.sb_key)
+			return -EIO;
+	} else {
+		/* A version 5 device allows a modulus size that will not fit
+		 * in the LSB, so the command will transfer it from memory.
+		 * Set the sb key to the default, even though it's not used.
+		 */
+		op.sb_key = cmd_q->sb_key;
+	}
+
+	/* The RSA exponent must be in little endian format. Reverse its
+	 * byte order.
+	 */
+	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
+	if (ret)
+		goto e_sb;
+
+	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
+	if (ret)
+		goto e_exp;
+
+	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
+		/* Copy the exponent to the local storage block, using
+		 * as many 32-byte blocks as were allocated above. It's
+		 * already little endian, so no further change is required.
+		 */
+		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
+				     CCP_PASSTHRU_BYTESWAP_NOOP);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_exp;
+		}
+	} else {
+		/* The exponent can be retrieved from memory via DMA. */
+		op.exp.u.dma.address = exp.dma.address;
+		op.exp.u.dma.offset = 0;
+	}
+
+	/* Concatenate the modulus and the message. Both the modulus and
+	 * the operands must be in little endian format.  Since the input
+	 * is in big endian format it must be converted.
+	 */
+	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
+	if (ret)
+		goto e_exp;
+
+	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
+	if (ret)
+		goto e_src;
+	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
+	if (ret)
+		goto e_src;
+
+	/* Prepare the output area for the operation */
+	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
+	if (ret)
+		goto e_src;
+
+	op.soc = 1;
+	op.src.u.dma.address = src.dma.address;
+	op.src.u.dma.offset = 0;
+	op.src.u.dma.length = i_len;
+	op.dst.u.dma.address = dst.dma.address;
+	op.dst.u.dma.offset = 0;
+	op.dst.u.dma.length = o_len;
+
+	op.u.rsa.mod_size = rsa->key_size;
+	op.u.rsa.input_len = i_len;
+
+	ret = cmd_q->ccp->vdata->perform->rsa(&op);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
+
+e_dst:
+	ccp_dm_free(&dst);
+
+e_src:
+	ccp_dm_free(&src);
+
+e_exp:
+	ccp_dm_free(&exp);
+
+e_sb:
+	if (sb_count)
+		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_passthru_engine *pt = &cmd->u.passthru;
+	struct ccp_dm_workarea mask;
+	struct ccp_data src, dst;
+	struct ccp_op op;
+	bool in_place = false;
+	unsigned int i;
+	int ret = 0;
+
+	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
+		return -EINVAL;
+
+	if (!pt->src || !pt->dst)
+		return -EINVAL;
+
+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
+		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
+			return -EINVAL;
+		if (!pt->mask)
+			return -EINVAL;
+	}
+
+	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
+		/* Load the mask */
+		op.sb_key = cmd_q->sb_key;
+
+		ret = ccp_init_dm_workarea(&mask, cmd_q,
+					   CCP_PASSTHRU_SB_COUNT *
+					   CCP_SB_BYTES,
+					   DMA_TO_DEVICE);
+		if (ret)
+			return ret;
+
+		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
+		if (ret)
+			goto e_mask;
+		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
+				     CCP_PASSTHRU_BYTESWAP_NOOP);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_mask;
+		}
+	}
+
+	/* Prepare the input and output data workareas. For in-place
+	 * operations we need to set the dma direction to BIDIRECTIONAL
+	 * and copy the src workarea to the dst workarea.
+	 */
+	if (sg_virt(pt->src) == sg_virt(pt->dst))
+		in_place = true;
+
+	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
+			    CCP_PASSTHRU_MASKSIZE,
+			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
+	if (ret)
+		goto e_mask;
+
+	if (in_place) {
+		dst = src;
+	} else {
+		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
+				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
+		if (ret)
+			goto e_src;
+	}
+
+	/* Send data to the CCP Passthru engine
+	 *   Because the CCP engine works on a single source and destination
+	 *   dma address at a time, each entry in the source scatterlist
+	 *   (after the dma_map_sg call) must be less than or equal to the
+	 *   (remaining) length in the destination scatterlist entry and the
+	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
+	 */
+	dst.sg_wa.sg_used = 0;
+	for (i = 1; i <= src.sg_wa.dma_count; i++) {
+		if (!dst.sg_wa.sg ||
+		    (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) {
+			ret = -EINVAL;
+			goto e_dst;
+		}
+
+		if (i == src.sg_wa.dma_count) {
+			op.eom = 1;
+			op.soc = 1;
+		}
+
+		op.src.type = CCP_MEMTYPE_SYSTEM;
+		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
+		op.src.u.dma.offset = 0;
+		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
+
+		op.dst.type = CCP_MEMTYPE_SYSTEM;
+		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
+		op.dst.u.dma.offset = dst.sg_wa.sg_used;
+		op.dst.u.dma.length = op.src.u.dma.length;
+
+		ret = cmd_q->ccp->vdata->perform->passthru(&op);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			goto e_dst;
+		}
+
+		dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg);
+		if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) {
+			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
+			dst.sg_wa.sg_used = 0;
+		}
+		src.sg_wa.sg = sg_next(src.sg_wa.sg);
+	}
+
+e_dst:
+	if (!in_place)
+		ccp_free_data(&dst, cmd_q);
+
+e_src:
+	ccp_free_data(&src, cmd_q);
+
+e_mask:
+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
+		ccp_dm_free(&mask);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
+				      struct ccp_cmd *cmd)
+{
+	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
+	struct ccp_dm_workarea mask;
+	struct ccp_op op;
+	int ret;
+
+	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
+		return -EINVAL;
+
+	if (!pt->src_dma || !pt->dst_dma)
+		return -EINVAL;
+
+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
+		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
+			return -EINVAL;
+		if (!pt->mask)
+			return -EINVAL;
+	}
+
+	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
+	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
+		/* Load the mask */
+		op.sb_key = cmd_q->sb_key;
+
+		mask.length = pt->mask_len;
+		mask.dma.address = pt->mask;
+		mask.dma.length = pt->mask_len;
+
+		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
+				     CCP_PASSTHRU_BYTESWAP_NOOP);
+		if (ret) {
+			cmd->engine_error = cmd_q->cmd_error;
+			return ret;
+		}
+	}
+
+	/* Send data to the CCP Passthru engine */
+	op.eom = 1;
+	op.soc = 1;
+
+	op.src.type = CCP_MEMTYPE_SYSTEM;
+	op.src.u.dma.address = pt->src_dma;
+	op.src.u.dma.offset = 0;
+	op.src.u.dma.length = pt->src_len;
+
+	op.dst.type = CCP_MEMTYPE_SYSTEM;
+	op.dst.u.dma.address = pt->dst_dma;
+	op.dst.u.dma.offset = 0;
+	op.dst.u.dma.length = pt->src_len;
+
+	ret = cmd_q->ccp->vdata->perform->passthru(&op);
+	if (ret)
+		cmd->engine_error = cmd_q->cmd_error;
+
+	return ret;
+}
+
+static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
+	struct ccp_dm_workarea src, dst;
+	struct ccp_op op;
+	int ret;
+	u8 *save;
+
+	if (!ecc->u.mm.operand_1 ||
+	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
+		if (!ecc->u.mm.operand_2 ||
+		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
+			return -EINVAL;
+
+	if (!ecc->u.mm.result ||
+	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
+	/* Concatenate the modulus and the operands. Both the modulus and
+	 * the operands must be in little endian format.  Since the input
+	 * is in big endian format it must be converted and placed in a
+	 * fixed length buffer.
+	 */
+	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	/* Save the workarea address since it is updated in order to perform
+	 * the concatenation
+	 */
+	save = src.address;
+
+	/* Copy the ECC modulus */
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
+	if (ret)
+		goto e_src;
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	/* Copy the first operand */
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
+				      ecc->u.mm.operand_1_len);
+	if (ret)
+		goto e_src;
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
+		/* Copy the second operand */
+		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
+					      ecc->u.mm.operand_2_len);
+		if (ret)
+			goto e_src;
+		src.address += CCP_ECC_OPERAND_SIZE;
+	}
+
+	/* Restore the workarea address */
+	src.address = save;
+
+	/* Prepare the output area for the operation */
+	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
+				   DMA_FROM_DEVICE);
+	if (ret)
+		goto e_src;
+
+	op.soc = 1;
+	op.src.u.dma.address = src.dma.address;
+	op.src.u.dma.offset = 0;
+	op.src.u.dma.length = src.length;
+	op.dst.u.dma.address = dst.dma.address;
+	op.dst.u.dma.offset = 0;
+	op.dst.u.dma.length = dst.length;
+
+	op.u.ecc.function = cmd->u.ecc.function;
+
+	ret = cmd_q->ccp->vdata->perform->ecc(&op);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	ecc->ecc_result = le16_to_cpup(
+		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
+	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
+		ret = -EIO;
+		goto e_dst;
+	}
+
+	/* Save the ECC result */
+	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
+				CCP_ECC_MODULUS_BYTES);
+
+e_dst:
+	ccp_dm_free(&dst);
+
+e_src:
+	ccp_dm_free(&src);
+
+	return ret;
+}
+
+static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
+	struct ccp_dm_workarea src, dst;
+	struct ccp_op op;
+	int ret;
+	u8 *save;
+
+	if (!ecc->u.pm.point_1.x ||
+	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
+	    !ecc->u.pm.point_1.y ||
+	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
+		if (!ecc->u.pm.point_2.x ||
+		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
+		    !ecc->u.pm.point_2.y ||
+		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
+			return -EINVAL;
+	} else {
+		if (!ecc->u.pm.domain_a ||
+		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
+			return -EINVAL;
+
+		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
+			if (!ecc->u.pm.scalar ||
+			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
+				return -EINVAL;
+	}
+
+	if (!ecc->u.pm.result.x ||
+	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
+	    !ecc->u.pm.result.y ||
+	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	memset(&op, 0, sizeof(op));
+	op.cmd_q = cmd_q;
+	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
+
+	/* Concatenate the modulus and the operands. Both the modulus and
+	 * the operands must be in little endian format.  Since the input
+	 * is in big endian format it must be converted and placed in a
+	 * fixed length buffer.
+	 */
+	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
+				   DMA_TO_DEVICE);
+	if (ret)
+		return ret;
+
+	/* Save the workarea address since it is updated in order to perform
+	 * the concatenation
+	 */
+	save = src.address;
+
+	/* Copy the ECC modulus */
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
+	if (ret)
+		goto e_src;
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	/* Copy the first point X and Y coordinate */
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
+				      ecc->u.pm.point_1.x_len);
+	if (ret)
+		goto e_src;
+	src.address += CCP_ECC_OPERAND_SIZE;
+	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
+				      ecc->u.pm.point_1.y_len);
+	if (ret)
+		goto e_src;
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	/* Set the first point Z coordinate to 1 */
+	*src.address = 0x01;
+	src.address += CCP_ECC_OPERAND_SIZE;
+
+	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
+		/* Copy the second point X and Y coordinate */
+		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
+					      ecc->u.pm.point_2.x_len);
+		if (ret)
+			goto e_src;
+		src.address += CCP_ECC_OPERAND_SIZE;
+		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
+					      ecc->u.pm.point_2.y_len);
+		if (ret)
+			goto e_src;
+		src.address += CCP_ECC_OPERAND_SIZE;
+
+		/* Set the second point Z coordinate to 1 */
+		*src.address = 0x01;
+		src.address += CCP_ECC_OPERAND_SIZE;
+	} else {
+		/* Copy the Domain "a" parameter */
+		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
+					      ecc->u.pm.domain_a_len);
+		if (ret)
+			goto e_src;
+		src.address += CCP_ECC_OPERAND_SIZE;
+
+		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
+			/* Copy the scalar value */
+			ret = ccp_reverse_set_dm_area(&src, 0,
+						      ecc->u.pm.scalar, 0,
+						      ecc->u.pm.scalar_len);
+			if (ret)
+				goto e_src;
+			src.address += CCP_ECC_OPERAND_SIZE;
+		}
+	}
+
+	/* Restore the workarea address */
+	src.address = save;
+
+	/* Prepare the output area for the operation */
+	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
+				   DMA_FROM_DEVICE);
+	if (ret)
+		goto e_src;
+
+	op.soc = 1;
+	op.src.u.dma.address = src.dma.address;
+	op.src.u.dma.offset = 0;
+	op.src.u.dma.length = src.length;
+	op.dst.u.dma.address = dst.dma.address;
+	op.dst.u.dma.offset = 0;
+	op.dst.u.dma.length = dst.length;
+
+	op.u.ecc.function = cmd->u.ecc.function;
+
+	ret = cmd_q->ccp->vdata->perform->ecc(&op);
+	if (ret) {
+		cmd->engine_error = cmd_q->cmd_error;
+		goto e_dst;
+	}
+
+	ecc->ecc_result = le16_to_cpup(
+		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
+	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
+		ret = -EIO;
+		goto e_dst;
+	}
+
+	/* Save the workarea address since it is updated as we walk through
+	 * to copy the point math result
+	 */
+	save = dst.address;
+
+	/* Save the ECC result X and Y coordinates */
+	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
+				CCP_ECC_MODULUS_BYTES);
+	dst.address += CCP_ECC_OUTPUT_SIZE;
+	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
+				CCP_ECC_MODULUS_BYTES);
+	dst.address += CCP_ECC_OUTPUT_SIZE;
+
+	/* Restore the workarea address */
+	dst.address = save;
+
+e_dst:
+	ccp_dm_free(&dst);
+
+e_src:
+	ccp_dm_free(&src);
+
+	return ret;
+}
+
+static noinline_for_stack int
+ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
+
+	ecc->ecc_result = 0;
+
+	if (!ecc->mod ||
+	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
+		return -EINVAL;
+
+	switch (ecc->function) {
+	case CCP_ECC_FUNCTION_MMUL_384BIT:
+	case CCP_ECC_FUNCTION_MADD_384BIT:
+	case CCP_ECC_FUNCTION_MINV_384BIT:
+		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
+
+	case CCP_ECC_FUNCTION_PADD_384BIT:
+	case CCP_ECC_FUNCTION_PMUL_384BIT:
+	case CCP_ECC_FUNCTION_PDBL_384BIT:
+		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
+
+	default:
+		return -EINVAL;
+	}
+}
+
+int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+{
+	int ret;
+
+	cmd->engine_error = 0;
+	cmd_q->cmd_error = 0;
+	cmd_q->int_rcvd = 0;
+	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
+
+	switch (cmd->engine) {
+	case CCP_ENGINE_AES:
+		switch (cmd->u.aes.mode) {
+		case CCP_AES_MODE_CMAC:
+			ret = ccp_run_aes_cmac_cmd(cmd_q, cmd);
+			break;
+		case CCP_AES_MODE_GCM:
+			ret = ccp_run_aes_gcm_cmd(cmd_q, cmd);
+			break;
+		default:
+			ret = ccp_run_aes_cmd(cmd_q, cmd);
+			break;
+		}
+		break;
+	case CCP_ENGINE_XTS_AES_128:
+		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_DES3:
+		ret = ccp_run_des3_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_SHA:
+		ret = ccp_run_sha_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_RSA:
+		ret = ccp_run_rsa_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_PASSTHRU:
+		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
+			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
+		else
+			ret = ccp_run_passthru_cmd(cmd_q, cmd);
+		break;
+	case CCP_ENGINE_ECC:
+		ret = ccp_run_ecc_cmd(cmd_q, cmd);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
diff --git a/drivers/crypto/ccp/psp-dev.c b/drivers/crypto/ccp/psp-dev.c
new file mode 100644
index 000000000..1603dc8d2
--- /dev/null
+++ b/drivers/crypto/ccp/psp-dev.c
@@ -0,0 +1,956 @@
+/*
+ * AMD Platform Security Processor (PSP) interface
+ *
+ * Copyright (C) 2016-2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/hw_random.h>
+#include <linux/ccp.h>
+#include <linux/firmware.h>
+
+#include "sp-dev.h"
+#include "psp-dev.h"
+
+#define SEV_VERSION_GREATER_OR_EQUAL(_maj, _min)	\
+		((psp_master->api_major) >= _maj &&	\
+		 (psp_master->api_minor) >= _min)
+
+#define DEVICE_NAME	"sev"
+#define SEV_FW_FILE	"amd/sev.fw"
+
+static DEFINE_MUTEX(sev_cmd_mutex);
+static struct sev_misc_dev *misc_dev;
+static struct psp_device *psp_master;
+
+static int psp_cmd_timeout = 100;
+module_param(psp_cmd_timeout, int, 0644);
+MODULE_PARM_DESC(psp_cmd_timeout, " default timeout value, in seconds, for PSP commands");
+
+static int psp_probe_timeout = 5;
+module_param(psp_probe_timeout, int, 0644);
+MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe");
+
+static bool psp_dead;
+static int psp_timeout;
+
+static struct psp_device *psp_alloc_struct(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct psp_device *psp;
+
+	psp = devm_kzalloc(dev, sizeof(*psp), GFP_KERNEL);
+	if (!psp)
+		return NULL;
+
+	psp->dev = dev;
+	psp->sp = sp;
+
+	snprintf(psp->name, sizeof(psp->name), "psp-%u", sp->ord);
+
+	return psp;
+}
+
+static irqreturn_t psp_irq_handler(int irq, void *data)
+{
+	struct psp_device *psp = data;
+	unsigned int status;
+	int reg;
+
+	/* Read the interrupt status: */
+	status = ioread32(psp->io_regs + psp->vdata->intsts_reg);
+
+	/* Check if it is command completion: */
+	if (!(status & PSP_CMD_COMPLETE))
+		goto done;
+
+	/* Check if it is SEV command completion: */
+	reg = ioread32(psp->io_regs + psp->vdata->cmdresp_reg);
+	if (reg & PSP_CMDRESP_RESP) {
+		psp->sev_int_rcvd = 1;
+		wake_up(&psp->sev_int_queue);
+	}
+
+done:
+	/* Clear the interrupt status by writing the same value we read. */
+	iowrite32(status, psp->io_regs + psp->vdata->intsts_reg);
+
+	return IRQ_HANDLED;
+}
+
+static int sev_wait_cmd_ioc(struct psp_device *psp,
+			    unsigned int *reg, unsigned int timeout)
+{
+	int ret;
+
+	ret = wait_event_timeout(psp->sev_int_queue,
+			psp->sev_int_rcvd, timeout * HZ);
+	if (!ret)
+		return -ETIMEDOUT;
+
+	*reg = ioread32(psp->io_regs + psp->vdata->cmdresp_reg);
+
+	return 0;
+}
+
+static int sev_cmd_buffer_len(int cmd)
+{
+	switch (cmd) {
+	case SEV_CMD_INIT:			return sizeof(struct sev_data_init);
+	case SEV_CMD_PLATFORM_STATUS:		return sizeof(struct sev_user_data_status);
+	case SEV_CMD_PEK_CSR:			return sizeof(struct sev_data_pek_csr);
+	case SEV_CMD_PEK_CERT_IMPORT:		return sizeof(struct sev_data_pek_cert_import);
+	case SEV_CMD_PDH_CERT_EXPORT:		return sizeof(struct sev_data_pdh_cert_export);
+	case SEV_CMD_LAUNCH_START:		return sizeof(struct sev_data_launch_start);
+	case SEV_CMD_LAUNCH_UPDATE_DATA:	return sizeof(struct sev_data_launch_update_data);
+	case SEV_CMD_LAUNCH_UPDATE_VMSA:	return sizeof(struct sev_data_launch_update_vmsa);
+	case SEV_CMD_LAUNCH_FINISH:		return sizeof(struct sev_data_launch_finish);
+	case SEV_CMD_LAUNCH_MEASURE:		return sizeof(struct sev_data_launch_measure);
+	case SEV_CMD_ACTIVATE:			return sizeof(struct sev_data_activate);
+	case SEV_CMD_DEACTIVATE:		return sizeof(struct sev_data_deactivate);
+	case SEV_CMD_DECOMMISSION:		return sizeof(struct sev_data_decommission);
+	case SEV_CMD_GUEST_STATUS:		return sizeof(struct sev_data_guest_status);
+	case SEV_CMD_DBG_DECRYPT:		return sizeof(struct sev_data_dbg);
+	case SEV_CMD_DBG_ENCRYPT:		return sizeof(struct sev_data_dbg);
+	case SEV_CMD_SEND_START:		return sizeof(struct sev_data_send_start);
+	case SEV_CMD_SEND_UPDATE_DATA:		return sizeof(struct sev_data_send_update_data);
+	case SEV_CMD_SEND_UPDATE_VMSA:		return sizeof(struct sev_data_send_update_vmsa);
+	case SEV_CMD_SEND_FINISH:		return sizeof(struct sev_data_send_finish);
+	case SEV_CMD_RECEIVE_START:		return sizeof(struct sev_data_receive_start);
+	case SEV_CMD_RECEIVE_FINISH:		return sizeof(struct sev_data_receive_finish);
+	case SEV_CMD_RECEIVE_UPDATE_DATA:	return sizeof(struct sev_data_receive_update_data);
+	case SEV_CMD_RECEIVE_UPDATE_VMSA:	return sizeof(struct sev_data_receive_update_vmsa);
+	case SEV_CMD_LAUNCH_UPDATE_SECRET:	return sizeof(struct sev_data_launch_secret);
+	case SEV_CMD_DOWNLOAD_FIRMWARE:		return sizeof(struct sev_data_download_firmware);
+	case SEV_CMD_GET_ID:			return sizeof(struct sev_data_get_id);
+	default:				return 0;
+	}
+
+	return 0;
+}
+
+static int __sev_do_cmd_locked(int cmd, void *data, int *psp_ret)
+{
+	struct psp_device *psp = psp_master;
+	unsigned int phys_lsb, phys_msb;
+	unsigned int reg, ret = 0;
+
+	if (!psp)
+		return -ENODEV;
+
+	if (psp_dead)
+		return -EBUSY;
+
+	/* Get the physical address of the command buffer */
+	phys_lsb = data ? lower_32_bits(__psp_pa(data)) : 0;
+	phys_msb = data ? upper_32_bits(__psp_pa(data)) : 0;
+
+	dev_dbg(psp->dev, "sev command id %#x buffer 0x%08x%08x timeout %us\n",
+		cmd, phys_msb, phys_lsb, psp_timeout);
+
+	print_hex_dump_debug("(in):  ", DUMP_PREFIX_OFFSET, 16, 2, data,
+			     sev_cmd_buffer_len(cmd), false);
+
+	iowrite32(phys_lsb, psp->io_regs + psp->vdata->cmdbuff_addr_lo_reg);
+	iowrite32(phys_msb, psp->io_regs + psp->vdata->cmdbuff_addr_hi_reg);
+
+	psp->sev_int_rcvd = 0;
+
+	reg = cmd;
+	reg <<= PSP_CMDRESP_CMD_SHIFT;
+	reg |= PSP_CMDRESP_IOC;
+	iowrite32(reg, psp->io_regs + psp->vdata->cmdresp_reg);
+
+	/* wait for command completion */
+	ret = sev_wait_cmd_ioc(psp, &reg, psp_timeout);
+	if (ret) {
+		if (psp_ret)
+			*psp_ret = 0;
+
+		dev_err(psp->dev, "sev command %#x timed out, disabling PSP \n", cmd);
+		psp_dead = true;
+
+		return ret;
+	}
+
+	psp_timeout = psp_cmd_timeout;
+
+	if (psp_ret)
+		*psp_ret = reg & PSP_CMDRESP_ERR_MASK;
+
+	if (reg & PSP_CMDRESP_ERR_MASK) {
+		dev_dbg(psp->dev, "sev command %#x failed (%#010x)\n",
+			cmd, reg & PSP_CMDRESP_ERR_MASK);
+		ret = -EIO;
+	}
+
+	print_hex_dump_debug("(out): ", DUMP_PREFIX_OFFSET, 16, 2, data,
+			     sev_cmd_buffer_len(cmd), false);
+
+	return ret;
+}
+
+static int sev_do_cmd(int cmd, void *data, int *psp_ret)
+{
+	int rc;
+
+	mutex_lock(&sev_cmd_mutex);
+	rc = __sev_do_cmd_locked(cmd, data, psp_ret);
+	mutex_unlock(&sev_cmd_mutex);
+
+	return rc;
+}
+
+static int __sev_platform_init_locked(int *error)
+{
+	struct psp_device *psp = psp_master;
+	int rc = 0;
+
+	if (!psp)
+		return -ENODEV;
+
+	if (psp->sev_state == SEV_STATE_INIT)
+		return 0;
+
+	rc = __sev_do_cmd_locked(SEV_CMD_INIT, &psp->init_cmd_buf, error);
+	if (rc)
+		return rc;
+
+	psp->sev_state = SEV_STATE_INIT;
+	dev_dbg(psp->dev, "SEV firmware initialized\n");
+
+	return rc;
+}
+
+int sev_platform_init(int *error)
+{
+	int rc;
+
+	mutex_lock(&sev_cmd_mutex);
+	rc = __sev_platform_init_locked(error);
+	mutex_unlock(&sev_cmd_mutex);
+
+	return rc;
+}
+EXPORT_SYMBOL_GPL(sev_platform_init);
+
+static int __sev_platform_shutdown_locked(int *error)
+{
+	int ret;
+
+	ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error);
+	if (ret)
+		return ret;
+
+	psp_master->sev_state = SEV_STATE_UNINIT;
+	dev_dbg(psp_master->dev, "SEV firmware shutdown\n");
+
+	return ret;
+}
+
+static int sev_platform_shutdown(int *error)
+{
+	int rc;
+
+	mutex_lock(&sev_cmd_mutex);
+	rc = __sev_platform_shutdown_locked(NULL);
+	mutex_unlock(&sev_cmd_mutex);
+
+	return rc;
+}
+
+static int sev_get_platform_state(int *state, int *error)
+{
+	int rc;
+
+	rc = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS,
+				 &psp_master->status_cmd_buf, error);
+	if (rc)
+		return rc;
+
+	*state = psp_master->status_cmd_buf.state;
+	return rc;
+}
+
+static int sev_ioctl_do_reset(struct sev_issue_cmd *argp)
+{
+	int state, rc;
+
+	/*
+	 * The SEV spec requires that FACTORY_RESET must be issued in
+	 * UNINIT state. Before we go further lets check if any guest is
+	 * active.
+	 *
+	 * If FW is in WORKING state then deny the request otherwise issue
+	 * SHUTDOWN command do INIT -> UNINIT before issuing the FACTORY_RESET.
+	 *
+	 */
+	rc = sev_get_platform_state(&state, &argp->error);
+	if (rc)
+		return rc;
+
+	if (state == SEV_STATE_WORKING)
+		return -EBUSY;
+
+	if (state == SEV_STATE_INIT) {
+		rc = __sev_platform_shutdown_locked(&argp->error);
+		if (rc)
+			return rc;
+	}
+
+	return __sev_do_cmd_locked(SEV_CMD_FACTORY_RESET, NULL, &argp->error);
+}
+
+static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
+{
+	struct sev_user_data_status *data = &psp_master->status_cmd_buf;
+	int ret;
+
+	ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, data, &argp->error);
+	if (ret)
+		return ret;
+
+	if (copy_to_user((void __user *)argp->data, data, sizeof(*data)))
+		ret = -EFAULT;
+
+	return ret;
+}
+
+static int sev_ioctl_do_pek_pdh_gen(int cmd, struct sev_issue_cmd *argp)
+{
+	int rc;
+
+	if (psp_master->sev_state == SEV_STATE_UNINIT) {
+		rc = __sev_platform_init_locked(&argp->error);
+		if (rc)
+			return rc;
+	}
+
+	return __sev_do_cmd_locked(cmd, NULL, &argp->error);
+}
+
+static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp)
+{
+	struct sev_user_data_pek_csr input;
+	struct sev_data_pek_csr *data;
+	void *blob = NULL;
+	int ret;
+
+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+		return -EFAULT;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* userspace wants to query CSR length */
+	if (!input.address || !input.length)
+		goto cmd;
+
+	/* allocate a physically contiguous buffer to store the CSR blob */
+	if (!access_ok(VERIFY_WRITE, input.address, input.length) ||
+	    input.length > SEV_FW_BLOB_MAX_SIZE) {
+		ret = -EFAULT;
+		goto e_free;
+	}
+
+	blob = kmalloc(input.length, GFP_KERNEL);
+	if (!blob) {
+		ret = -ENOMEM;
+		goto e_free;
+	}
+
+	data->address = __psp_pa(blob);
+	data->len = input.length;
+
+cmd:
+	if (psp_master->sev_state == SEV_STATE_UNINIT) {
+		ret = __sev_platform_init_locked(&argp->error);
+		if (ret)
+			goto e_free_blob;
+	}
+
+	ret = __sev_do_cmd_locked(SEV_CMD_PEK_CSR, data, &argp->error);
+
+	 /* If we query the CSR length, FW responded with expected data. */
+	input.length = data->len;
+
+	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
+		ret = -EFAULT;
+		goto e_free_blob;
+	}
+
+	if (blob) {
+		if (copy_to_user((void __user *)input.address, blob, input.length))
+			ret = -EFAULT;
+	}
+
+e_free_blob:
+	kfree(blob);
+e_free:
+	kfree(data);
+	return ret;
+}
+
+void *psp_copy_user_blob(u64 __user uaddr, u32 len)
+{
+	if (!uaddr || !len)
+		return ERR_PTR(-EINVAL);
+
+	/* verify that blob length does not exceed our limit */
+	if (len > SEV_FW_BLOB_MAX_SIZE)
+		return ERR_PTR(-EINVAL);
+
+	return memdup_user((void __user *)(uintptr_t)uaddr, len);
+}
+EXPORT_SYMBOL_GPL(psp_copy_user_blob);
+
+static int sev_get_api_version(void)
+{
+	struct sev_user_data_status *status;
+	int error, ret;
+
+	status = &psp_master->status_cmd_buf;
+	ret = sev_platform_status(status, &error);
+	if (ret) {
+		dev_err(psp_master->dev,
+			"SEV: failed to get status. Error: %#x\n", error);
+		return 1;
+	}
+
+	psp_master->api_major = status->api_major;
+	psp_master->api_minor = status->api_minor;
+	psp_master->build = status->build;
+
+	return 0;
+}
+
+/* Don't fail if SEV FW couldn't be updated. Continue with existing SEV FW */
+static int sev_update_firmware(struct device *dev)
+{
+	struct sev_data_download_firmware *data;
+	const struct firmware *firmware;
+	int ret, error, order;
+	struct page *p;
+	u64 data_size;
+
+	ret = request_firmware(&firmware, SEV_FW_FILE, dev);
+	if (ret < 0)
+		return -1;
+
+	/*
+	 * SEV FW expects the physical address given to it to be 32
+	 * byte aligned. Memory allocated has structure placed at the
+	 * beginning followed by the firmware being passed to the SEV
+	 * FW. Allocate enough memory for data structure + alignment
+	 * padding + SEV FW.
+	 */
+	data_size = ALIGN(sizeof(struct sev_data_download_firmware), 32);
+
+	order = get_order(firmware->size + data_size);
+	p = alloc_pages(GFP_KERNEL, order);
+	if (!p) {
+		ret = -1;
+		goto fw_err;
+	}
+
+	/*
+	 * Copy firmware data to a kernel allocated contiguous
+	 * memory region.
+	 */
+	data = page_address(p);
+	memcpy(page_address(p) + data_size, firmware->data, firmware->size);
+
+	data->address = __psp_pa(page_address(p) + data_size);
+	data->len = firmware->size;
+
+	ret = sev_do_cmd(SEV_CMD_DOWNLOAD_FIRMWARE, data, &error);
+	if (ret)
+		dev_dbg(dev, "Failed to update SEV firmware: %#x\n", error);
+	else
+		dev_info(dev, "SEV firmware update successful\n");
+
+	__free_pages(p, order);
+
+fw_err:
+	release_firmware(firmware);
+
+	return ret;
+}
+
+static int sev_ioctl_do_pek_import(struct sev_issue_cmd *argp)
+{
+	struct sev_user_data_pek_cert_import input;
+	struct sev_data_pek_cert_import *data;
+	void *pek_blob, *oca_blob;
+	int ret;
+
+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+		return -EFAULT;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* copy PEK certificate blobs from userspace */
+	pek_blob = psp_copy_user_blob(input.pek_cert_address, input.pek_cert_len);
+	if (IS_ERR(pek_blob)) {
+		ret = PTR_ERR(pek_blob);
+		goto e_free;
+	}
+
+	data->pek_cert_address = __psp_pa(pek_blob);
+	data->pek_cert_len = input.pek_cert_len;
+
+	/* copy PEK certificate blobs from userspace */
+	oca_blob = psp_copy_user_blob(input.oca_cert_address, input.oca_cert_len);
+	if (IS_ERR(oca_blob)) {
+		ret = PTR_ERR(oca_blob);
+		goto e_free_pek;
+	}
+
+	data->oca_cert_address = __psp_pa(oca_blob);
+	data->oca_cert_len = input.oca_cert_len;
+
+	/* If platform is not in INIT state then transition it to INIT */
+	if (psp_master->sev_state != SEV_STATE_INIT) {
+		ret = __sev_platform_init_locked(&argp->error);
+		if (ret)
+			goto e_free_oca;
+	}
+
+	ret = __sev_do_cmd_locked(SEV_CMD_PEK_CERT_IMPORT, data, &argp->error);
+
+e_free_oca:
+	kfree(oca_blob);
+e_free_pek:
+	kfree(pek_blob);
+e_free:
+	kfree(data);
+	return ret;
+}
+
+static int sev_ioctl_do_get_id(struct sev_issue_cmd *argp)
+{
+	struct sev_data_get_id *data;
+	u64 data_size, user_size;
+	void *id_blob, *mem;
+	int ret;
+
+	/* SEV GET_ID available from SEV API v0.16 and up */
+	if (!SEV_VERSION_GREATER_OR_EQUAL(0, 16))
+		return -ENOTSUPP;
+
+	/* SEV FW expects the buffer it fills with the ID to be
+	 * 8-byte aligned. Memory allocated should be enough to
+	 * hold data structure + alignment padding + memory
+	 * where SEV FW writes the ID.
+	 */
+	data_size = ALIGN(sizeof(struct sev_data_get_id), 8);
+	user_size = sizeof(struct sev_user_data_get_id);
+
+	mem = kzalloc(data_size + user_size, GFP_KERNEL);
+	if (!mem)
+		return -ENOMEM;
+
+	data = mem;
+	id_blob = mem + data_size;
+
+	data->address = __psp_pa(id_blob);
+	data->len = user_size;
+
+	ret = __sev_do_cmd_locked(SEV_CMD_GET_ID, data, &argp->error);
+	if (!ret) {
+		if (copy_to_user((void __user *)argp->data, id_blob, data->len))
+			ret = -EFAULT;
+	}
+
+	kfree(mem);
+
+	return ret;
+}
+
+static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp)
+{
+	struct sev_user_data_pdh_cert_export input;
+	void *pdh_blob = NULL, *cert_blob = NULL;
+	struct sev_data_pdh_cert_export *data;
+	int ret;
+
+	if (copy_from_user(&input, (void __user *)argp->data, sizeof(input)))
+		return -EFAULT;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	/* Userspace wants to query the certificate length. */
+	if (!input.pdh_cert_address ||
+	    !input.pdh_cert_len ||
+	    !input.cert_chain_address)
+		goto cmd;
+
+	/* Allocate a physically contiguous buffer to store the PDH blob. */
+	if ((input.pdh_cert_len > SEV_FW_BLOB_MAX_SIZE) ||
+	    !access_ok(VERIFY_WRITE, input.pdh_cert_address, input.pdh_cert_len)) {
+		ret = -EFAULT;
+		goto e_free;
+	}
+
+	/* Allocate a physically contiguous buffer to store the cert chain blob. */
+	if ((input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) ||
+	    !access_ok(VERIFY_WRITE, input.cert_chain_address, input.cert_chain_len)) {
+		ret = -EFAULT;
+		goto e_free;
+	}
+
+	pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
+	if (!pdh_blob) {
+		ret = -ENOMEM;
+		goto e_free;
+	}
+
+	data->pdh_cert_address = __psp_pa(pdh_blob);
+	data->pdh_cert_len = input.pdh_cert_len;
+
+	cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
+	if (!cert_blob) {
+		ret = -ENOMEM;
+		goto e_free_pdh;
+	}
+
+	data->cert_chain_address = __psp_pa(cert_blob);
+	data->cert_chain_len = input.cert_chain_len;
+
+cmd:
+	/* If platform is not in INIT state then transition it to INIT. */
+	if (psp_master->sev_state != SEV_STATE_INIT) {
+		ret = __sev_platform_init_locked(&argp->error);
+		if (ret)
+			goto e_free_cert;
+	}
+
+	ret = __sev_do_cmd_locked(SEV_CMD_PDH_CERT_EXPORT, data, &argp->error);
+
+	/* If we query the length, FW responded with expected data. */
+	input.cert_chain_len = data->cert_chain_len;
+	input.pdh_cert_len = data->pdh_cert_len;
+
+	if (copy_to_user((void __user *)argp->data, &input, sizeof(input))) {
+		ret = -EFAULT;
+		goto e_free_cert;
+	}
+
+	if (pdh_blob) {
+		if (copy_to_user((void __user *)input.pdh_cert_address,
+				 pdh_blob, input.pdh_cert_len)) {
+			ret = -EFAULT;
+			goto e_free_cert;
+		}
+	}
+
+	if (cert_blob) {
+		if (copy_to_user((void __user *)input.cert_chain_address,
+				 cert_blob, input.cert_chain_len))
+			ret = -EFAULT;
+	}
+
+e_free_cert:
+	kfree(cert_blob);
+e_free_pdh:
+	kfree(pdh_blob);
+e_free:
+	kfree(data);
+	return ret;
+}
+
+static long sev_ioctl(struct file *file, unsigned int ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct sev_issue_cmd input;
+	int ret = -EFAULT;
+
+	if (!psp_master)
+		return -ENODEV;
+
+	if (ioctl != SEV_ISSUE_CMD)
+		return -EINVAL;
+
+	if (copy_from_user(&input, argp, sizeof(struct sev_issue_cmd)))
+		return -EFAULT;
+
+	if (input.cmd > SEV_MAX)
+		return -EINVAL;
+
+	mutex_lock(&sev_cmd_mutex);
+
+	switch (input.cmd) {
+
+	case SEV_FACTORY_RESET:
+		ret = sev_ioctl_do_reset(&input);
+		break;
+	case SEV_PLATFORM_STATUS:
+		ret = sev_ioctl_do_platform_status(&input);
+		break;
+	case SEV_PEK_GEN:
+		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PEK_GEN, &input);
+		break;
+	case SEV_PDH_GEN:
+		ret = sev_ioctl_do_pek_pdh_gen(SEV_CMD_PDH_GEN, &input);
+		break;
+	case SEV_PEK_CSR:
+		ret = sev_ioctl_do_pek_csr(&input);
+		break;
+	case SEV_PEK_CERT_IMPORT:
+		ret = sev_ioctl_do_pek_import(&input);
+		break;
+	case SEV_PDH_CERT_EXPORT:
+		ret = sev_ioctl_do_pdh_export(&input);
+		break;
+	case SEV_GET_ID:
+		ret = sev_ioctl_do_get_id(&input);
+		break;
+	default:
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (copy_to_user(argp, &input, sizeof(struct sev_issue_cmd)))
+		ret = -EFAULT;
+out:
+	mutex_unlock(&sev_cmd_mutex);
+
+	return ret;
+}
+
+static const struct file_operations sev_fops = {
+	.owner	= THIS_MODULE,
+	.unlocked_ioctl = sev_ioctl,
+};
+
+int sev_platform_status(struct sev_user_data_status *data, int *error)
+{
+	return sev_do_cmd(SEV_CMD_PLATFORM_STATUS, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_platform_status);
+
+int sev_guest_deactivate(struct sev_data_deactivate *data, int *error)
+{
+	return sev_do_cmd(SEV_CMD_DEACTIVATE, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_deactivate);
+
+int sev_guest_activate(struct sev_data_activate *data, int *error)
+{
+	return sev_do_cmd(SEV_CMD_ACTIVATE, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_activate);
+
+int sev_guest_decommission(struct sev_data_decommission *data, int *error)
+{
+	return sev_do_cmd(SEV_CMD_DECOMMISSION, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_decommission);
+
+int sev_guest_df_flush(int *error)
+{
+	return sev_do_cmd(SEV_CMD_DF_FLUSH, NULL, error);
+}
+EXPORT_SYMBOL_GPL(sev_guest_df_flush);
+
+static void sev_exit(struct kref *ref)
+{
+	struct sev_misc_dev *misc_dev = container_of(ref, struct sev_misc_dev, refcount);
+
+	misc_deregister(&misc_dev->misc);
+}
+
+static int sev_misc_init(struct psp_device *psp)
+{
+	struct device *dev = psp->dev;
+	int ret;
+
+	/*
+	 * SEV feature support can be detected on multiple devices but the SEV
+	 * FW commands must be issued on the master. During probe, we do not
+	 * know the master hence we create /dev/sev on the first device probe.
+	 * sev_do_cmd() finds the right master device to which to issue the
+	 * command to the firmware.
+	 */
+	if (!misc_dev) {
+		struct miscdevice *misc;
+
+		misc_dev = devm_kzalloc(dev, sizeof(*misc_dev), GFP_KERNEL);
+		if (!misc_dev)
+			return -ENOMEM;
+
+		misc = &misc_dev->misc;
+		misc->minor = MISC_DYNAMIC_MINOR;
+		misc->name = DEVICE_NAME;
+		misc->fops = &sev_fops;
+
+		ret = misc_register(misc);
+		if (ret)
+			return ret;
+
+		kref_init(&misc_dev->refcount);
+	} else {
+		kref_get(&misc_dev->refcount);
+	}
+
+	init_waitqueue_head(&psp->sev_int_queue);
+	psp->sev_misc = misc_dev;
+	dev_dbg(dev, "registered SEV device\n");
+
+	return 0;
+}
+
+static int sev_init(struct psp_device *psp)
+{
+	/* Check if device supports SEV feature */
+	if (!(ioread32(psp->io_regs + psp->vdata->feature_reg) & 1)) {
+		dev_dbg(psp->dev, "device does not support SEV\n");
+		return 1;
+	}
+
+	return sev_misc_init(psp);
+}
+
+int psp_dev_init(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct psp_device *psp;
+	int ret;
+
+	ret = -ENOMEM;
+	psp = psp_alloc_struct(sp);
+	if (!psp)
+		goto e_err;
+
+	sp->psp_data = psp;
+
+	psp->vdata = (struct psp_vdata *)sp->dev_vdata->psp_vdata;
+	if (!psp->vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	psp->io_regs = sp->io_map;
+
+	/* Disable and clear interrupts until ready */
+	iowrite32(0, psp->io_regs + psp->vdata->inten_reg);
+	iowrite32(-1, psp->io_regs + psp->vdata->intsts_reg);
+
+	/* Request an irq */
+	ret = sp_request_psp_irq(psp->sp, psp_irq_handler, psp->name, psp);
+	if (ret) {
+		dev_err(dev, "psp: unable to allocate an IRQ\n");
+		goto e_err;
+	}
+
+	ret = sev_init(psp);
+	if (ret)
+		goto e_irq;
+
+	if (sp->set_psp_master_device)
+		sp->set_psp_master_device(sp);
+
+	/* Enable interrupt */
+	iowrite32(-1, psp->io_regs + psp->vdata->inten_reg);
+
+	dev_notice(dev, "psp enabled\n");
+
+	return 0;
+
+e_irq:
+	sp_free_psp_irq(psp->sp, psp);
+e_err:
+	sp->psp_data = NULL;
+
+	dev_notice(dev, "psp initialization failed\n");
+
+	return ret;
+}
+
+void psp_dev_destroy(struct sp_device *sp)
+{
+	struct psp_device *psp = sp->psp_data;
+
+	if (!psp)
+		return;
+
+	if (psp->sev_misc)
+		kref_put(&misc_dev->refcount, sev_exit);
+
+	sp_free_psp_irq(sp, psp);
+}
+
+int sev_issue_cmd_external_user(struct file *filep, unsigned int cmd,
+				void *data, int *error)
+{
+	if (!filep || filep->f_op != &sev_fops)
+		return -EBADF;
+
+	return  sev_do_cmd(cmd, data, error);
+}
+EXPORT_SYMBOL_GPL(sev_issue_cmd_external_user);
+
+void psp_pci_init(void)
+{
+	struct sp_device *sp;
+	int error, rc;
+
+	sp = sp_get_psp_master_device();
+	if (!sp)
+		return;
+
+	psp_master = sp->psp_data;
+
+	psp_timeout = psp_probe_timeout;
+
+	if (sev_get_api_version())
+		goto err;
+
+	if (SEV_VERSION_GREATER_OR_EQUAL(0, 15) &&
+	    sev_update_firmware(psp_master->dev) == 0)
+		sev_get_api_version();
+
+	/* Initialize the platform */
+	rc = sev_platform_init(&error);
+	if (rc) {
+		dev_err(sp->dev, "SEV: failed to INIT error %#x\n", error);
+		return;
+	}
+
+	dev_info(sp->dev, "SEV API:%d.%d build:%d\n", psp_master->api_major,
+		 psp_master->api_minor, psp_master->build);
+
+	return;
+
+err:
+	psp_master = NULL;
+}
+
+void psp_pci_exit(void)
+{
+	if (!psp_master)
+		return;
+
+	sev_platform_shutdown(NULL);
+}
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
new file mode 100644
index 000000000..8b53a9674
--- /dev/null
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -0,0 +1,70 @@
+/*
+ * AMD Platform Security Processor (PSP) interface driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __PSP_DEV_H__
+#define __PSP_DEV_H__
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+#include <linux/dmaengine.h>
+#include <linux/psp-sev.h>
+#include <linux/miscdevice.h>
+
+#include "sp-dev.h"
+
+#define PSP_CMD_COMPLETE		BIT(1)
+
+#define PSP_CMDRESP_CMD_SHIFT		16
+#define PSP_CMDRESP_IOC			BIT(0)
+#define PSP_CMDRESP_RESP		BIT(31)
+#define PSP_CMDRESP_ERR_MASK		0xffff
+
+#define MAX_PSP_NAME_LEN		16
+
+struct sev_misc_dev {
+	struct kref refcount;
+	struct miscdevice misc;
+};
+
+struct psp_device {
+	struct list_head entry;
+
+	struct psp_vdata *vdata;
+	char name[MAX_PSP_NAME_LEN];
+
+	struct device *dev;
+	struct sp_device *sp;
+
+	void __iomem *io_regs;
+
+	int sev_state;
+	unsigned int sev_int_rcvd;
+	wait_queue_head_t sev_int_queue;
+	struct sev_misc_dev *sev_misc;
+	struct sev_user_data_status status_cmd_buf;
+	struct sev_data_init init_cmd_buf;
+
+	u8 api_major;
+	u8 api_minor;
+	u8 build;
+};
+
+#endif /* __PSP_DEV_H */
diff --git a/drivers/crypto/ccp/sp-dev.c b/drivers/crypto/ccp/sp-dev.c
new file mode 100644
index 000000000..e0459002e
--- /dev/null
+++ b/drivers/crypto/ccp/sp-dev.c
@@ -0,0 +1,312 @@
+/*
+ * AMD Secure Processor driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/spinlock_types.h>
+#include <linux/types.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+#include "sp-dev.h"
+
+MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>");
+MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.1.0");
+MODULE_DESCRIPTION("AMD Secure Processor driver");
+
+/* List of SPs, SP count, read-write access lock, and access functions
+ *
+ * Lock structure: get sp_unit_lock for reading whenever we need to
+ * examine the SP list.
+ */
+static DEFINE_RWLOCK(sp_unit_lock);
+static LIST_HEAD(sp_units);
+
+/* Ever-increasing value to produce unique unit numbers */
+static atomic_t sp_ordinal;
+
+static void sp_add_device(struct sp_device *sp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+
+	list_add_tail(&sp->entry, &sp_units);
+
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+}
+
+static void sp_del_device(struct sp_device *sp)
+{
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+
+	list_del(&sp->entry);
+
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+}
+
+static irqreturn_t sp_irq_handler(int irq, void *data)
+{
+	struct sp_device *sp = data;
+
+	if (sp->ccp_irq_handler)
+		sp->ccp_irq_handler(irq, sp->ccp_irq_data);
+
+	if (sp->psp_irq_handler)
+		sp->psp_irq_handler(irq, sp->psp_irq_data);
+
+	return IRQ_HANDLED;
+}
+
+int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data)
+{
+	int ret;
+
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) {
+		/* Need a common routine to manage all interrupts */
+		sp->ccp_irq_data = data;
+		sp->ccp_irq_handler = handler;
+
+		if (!sp->irq_registered) {
+			ret = request_irq(sp->ccp_irq, sp_irq_handler, 0,
+					  sp->name, sp);
+			if (ret)
+				return ret;
+
+			sp->irq_registered = true;
+		}
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		ret = request_irq(sp->ccp_irq, handler, 0, name, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data)
+{
+	int ret;
+
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) {
+		/* Need a common routine to manage all interrupts */
+		sp->psp_irq_data = data;
+		sp->psp_irq_handler = handler;
+
+		if (!sp->irq_registered) {
+			ret = request_irq(sp->psp_irq, sp_irq_handler, 0,
+					  sp->name, sp);
+			if (ret)
+				return ret;
+
+			sp->irq_registered = true;
+		}
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		ret = request_irq(sp->psp_irq, handler, 0, name, data);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void sp_free_ccp_irq(struct sp_device *sp, void *data)
+{
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->psp_vdata) {
+		/* Using common routine to manage all interrupts */
+		if (!sp->psp_irq_handler) {
+			/* Nothing else using it, so free it */
+			free_irq(sp->ccp_irq, sp);
+
+			sp->irq_registered = false;
+		}
+
+		sp->ccp_irq_handler = NULL;
+		sp->ccp_irq_data = NULL;
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		free_irq(sp->ccp_irq, data);
+	}
+}
+
+void sp_free_psp_irq(struct sp_device *sp, void *data)
+{
+	if ((sp->psp_irq == sp->ccp_irq) && sp->dev_vdata->ccp_vdata) {
+		/* Using common routine to manage all interrupts */
+		if (!sp->ccp_irq_handler) {
+			/* Nothing else using it, so free it */
+			free_irq(sp->psp_irq, sp);
+
+			sp->irq_registered = false;
+		}
+
+		sp->psp_irq_handler = NULL;
+		sp->psp_irq_data = NULL;
+	} else {
+		/* Each sub-device can manage it's own interrupt */
+		free_irq(sp->psp_irq, data);
+	}
+}
+
+/**
+ * sp_alloc_struct - allocate and initialize the sp_device struct
+ *
+ * @dev: device struct of the SP
+ */
+struct sp_device *sp_alloc_struct(struct device *dev)
+{
+	struct sp_device *sp;
+
+	sp = devm_kzalloc(dev, sizeof(*sp), GFP_KERNEL);
+	if (!sp)
+		return NULL;
+
+	sp->dev = dev;
+	sp->ord = atomic_inc_return(&sp_ordinal);
+	snprintf(sp->name, SP_MAX_NAME_LEN, "sp-%u", sp->ord);
+
+	return sp;
+}
+
+int sp_init(struct sp_device *sp)
+{
+	sp_add_device(sp);
+
+	if (sp->dev_vdata->ccp_vdata)
+		ccp_dev_init(sp);
+
+	if (sp->dev_vdata->psp_vdata)
+		psp_dev_init(sp);
+	return 0;
+}
+
+void sp_destroy(struct sp_device *sp)
+{
+	if (sp->dev_vdata->ccp_vdata)
+		ccp_dev_destroy(sp);
+
+	if (sp->dev_vdata->psp_vdata)
+		psp_dev_destroy(sp);
+
+	sp_del_device(sp);
+}
+
+#ifdef CONFIG_PM
+int sp_suspend(struct sp_device *sp, pm_message_t state)
+{
+	int ret;
+
+	if (sp->dev_vdata->ccp_vdata) {
+		ret = ccp_dev_suspend(sp, state);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int sp_resume(struct sp_device *sp)
+{
+	int ret;
+
+	if (sp->dev_vdata->ccp_vdata) {
+		ret = ccp_dev_resume(sp);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+#endif
+
+struct sp_device *sp_get_psp_master_device(void)
+{
+	struct sp_device *i, *ret = NULL;
+	unsigned long flags;
+
+	write_lock_irqsave(&sp_unit_lock, flags);
+	if (list_empty(&sp_units))
+		goto unlock;
+
+	list_for_each_entry(i, &sp_units, entry) {
+		if (i->psp_data && i->get_psp_master_device) {
+			ret = i->get_psp_master_device();
+			break;
+		}
+	}
+
+unlock:
+	write_unlock_irqrestore(&sp_unit_lock, flags);
+	return ret;
+}
+
+static int __init sp_mod_init(void)
+{
+#ifdef CONFIG_X86
+	int ret;
+
+	ret = sp_pci_init();
+	if (ret)
+		return ret;
+
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+	psp_pci_init();
+#endif
+
+	return 0;
+#endif
+
+#ifdef CONFIG_ARM64
+	int ret;
+
+	ret = sp_platform_init();
+	if (ret)
+		return ret;
+
+	return 0;
+#endif
+
+	return -ENODEV;
+}
+
+static void __exit sp_mod_exit(void)
+{
+#ifdef CONFIG_X86
+
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+	psp_pci_exit();
+#endif
+
+	sp_pci_exit();
+#endif
+
+#ifdef CONFIG_ARM64
+	sp_platform_exit();
+#endif
+}
+
+module_init(sp_mod_init);
+module_exit(sp_mod_exit);
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
new file mode 100644
index 000000000..14398cad1
--- /dev/null
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -0,0 +1,164 @@
+/*
+ * AMD Secure Processor driver
+ *
+ * Copyright (C) 2017 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __SP_DEV_H__
+#define __SP_DEV_H__
+
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+#include <linux/hw_random.h>
+#include <linux/bitops.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
+
+#define SP_MAX_NAME_LEN		32
+
+#define CACHE_NONE			0x00
+#define CACHE_WB_NO_ALLOC		0xb7
+
+/* Structure to hold CCP device data */
+struct ccp_device;
+struct ccp_vdata {
+	const unsigned int version;
+	const unsigned int dma_chan_attr;
+	void (*setup)(struct ccp_device *);
+	const struct ccp_actions *perform;
+	const unsigned int offset;
+	const unsigned int rsamax;
+};
+
+struct psp_vdata {
+	const unsigned int cmdresp_reg;
+	const unsigned int cmdbuff_addr_lo_reg;
+	const unsigned int cmdbuff_addr_hi_reg;
+	const unsigned int feature_reg;
+	const unsigned int inten_reg;
+	const unsigned int intsts_reg;
+};
+
+/* Structure to hold SP device data */
+struct sp_dev_vdata {
+	const unsigned int bar;
+
+	const struct ccp_vdata *ccp_vdata;
+	const struct psp_vdata *psp_vdata;
+};
+
+struct sp_device {
+	struct list_head entry;
+
+	struct device *dev;
+
+	struct sp_dev_vdata *dev_vdata;
+	unsigned int ord;
+	char name[SP_MAX_NAME_LEN];
+
+	/* Bus specific device information */
+	void *dev_specific;
+
+	/* I/O area used for device communication. */
+	void __iomem *io_map;
+
+	/* DMA caching attribute support */
+	unsigned int axcache;
+
+	/* get and set master device */
+	struct sp_device*(*get_psp_master_device)(void);
+	void (*set_psp_master_device)(struct sp_device *);
+
+	bool irq_registered;
+	bool use_tasklet;
+
+	unsigned int ccp_irq;
+	irq_handler_t ccp_irq_handler;
+	void *ccp_irq_data;
+
+	unsigned int psp_irq;
+	irq_handler_t psp_irq_handler;
+	void *psp_irq_data;
+
+	void *ccp_data;
+	void *psp_data;
+};
+
+int sp_pci_init(void);
+void sp_pci_exit(void);
+
+int sp_platform_init(void);
+void sp_platform_exit(void);
+
+struct sp_device *sp_alloc_struct(struct device *dev);
+
+int sp_init(struct sp_device *sp);
+void sp_destroy(struct sp_device *sp);
+struct sp_device *sp_get_master(void);
+
+int sp_suspend(struct sp_device *sp, pm_message_t state);
+int sp_resume(struct sp_device *sp);
+int sp_request_ccp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data);
+void sp_free_ccp_irq(struct sp_device *sp, void *data);
+int sp_request_psp_irq(struct sp_device *sp, irq_handler_t handler,
+		       const char *name, void *data);
+void sp_free_psp_irq(struct sp_device *sp, void *data);
+struct sp_device *sp_get_psp_master_device(void);
+
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+
+int ccp_dev_init(struct sp_device *sp);
+void ccp_dev_destroy(struct sp_device *sp);
+
+int ccp_dev_suspend(struct sp_device *sp, pm_message_t state);
+int ccp_dev_resume(struct sp_device *sp);
+
+#else	/* !CONFIG_CRYPTO_DEV_SP_CCP */
+
+static inline int ccp_dev_init(struct sp_device *sp)
+{
+	return 0;
+}
+static inline void ccp_dev_destroy(struct sp_device *sp) { }
+
+static inline int ccp_dev_suspend(struct sp_device *sp, pm_message_t state)
+{
+	return 0;
+}
+static inline int ccp_dev_resume(struct sp_device *sp)
+{
+	return 0;
+}
+#endif	/* CONFIG_CRYPTO_DEV_SP_CCP */
+
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+
+int psp_dev_init(struct sp_device *sp);
+void psp_pci_init(void);
+void psp_dev_destroy(struct sp_device *sp);
+void psp_pci_exit(void);
+
+#else /* !CONFIG_CRYPTO_DEV_SP_PSP */
+
+static inline int psp_dev_init(struct sp_device *sp) { return 0; }
+static inline void psp_pci_init(void) { }
+static inline void psp_dev_destroy(struct sp_device *sp) { }
+static inline void psp_pci_exit(void) { }
+
+#endif /* CONFIG_CRYPTO_DEV_SP_PSP */
+
+#endif
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
new file mode 100644
index 000000000..9b2742212
--- /dev/null
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -0,0 +1,354 @@
+/*
+ * AMD Secure Processor device driver
+ *
+ * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/pci_ids.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+
+#include "ccp-dev.h"
+#include "psp-dev.h"
+
+#define MSIX_VECTORS			2
+
+struct sp_pci {
+	int msix_count;
+	struct msix_entry msix_entry[MSIX_VECTORS];
+};
+static struct sp_device *sp_dev_master;
+
+static int sp_get_msix_irqs(struct sp_device *sp)
+{
+	struct sp_pci *sp_pci = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int v, ret;
+
+	for (v = 0; v < ARRAY_SIZE(sp_pci->msix_entry); v++)
+		sp_pci->msix_entry[v].entry = v;
+
+	ret = pci_enable_msix_range(pdev, sp_pci->msix_entry, 1, v);
+	if (ret < 0)
+		return ret;
+
+	sp_pci->msix_count = ret;
+	sp->use_tasklet = true;
+
+	sp->psp_irq = sp_pci->msix_entry[0].vector;
+	sp->ccp_irq = (sp_pci->msix_count > 1) ? sp_pci->msix_entry[1].vector
+					       : sp_pci->msix_entry[0].vector;
+	return 0;
+}
+
+static int sp_get_msi_irq(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int ret;
+
+	ret = pci_enable_msi(pdev);
+	if (ret)
+		return ret;
+
+	sp->ccp_irq = pdev->irq;
+	sp->psp_irq = pdev->irq;
+
+	return 0;
+}
+
+static int sp_get_irqs(struct sp_device *sp)
+{
+	struct device *dev = sp->dev;
+	int ret;
+
+	ret = sp_get_msix_irqs(sp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI-X vectors, try MSI */
+	dev_notice(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+	ret = sp_get_msi_irq(sp);
+	if (!ret)
+		return 0;
+
+	/* Couldn't get MSI interrupt */
+	dev_notice(dev, "could not enable MSI (%d)\n", ret);
+
+	return ret;
+}
+
+static void sp_free_irqs(struct sp_device *sp)
+{
+	struct sp_pci *sp_pci = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+
+	if (sp_pci->msix_count)
+		pci_disable_msix(pdev);
+	else if (sp->psp_irq)
+		pci_disable_msi(pdev);
+
+	sp->ccp_irq = 0;
+	sp->psp_irq = 0;
+}
+
+static bool sp_pci_is_master(struct sp_device *sp)
+{
+	struct device *dev_cur, *dev_new;
+	struct pci_dev *pdev_cur, *pdev_new;
+
+	dev_new = sp->dev;
+	dev_cur = sp_dev_master->dev;
+
+	pdev_new = to_pci_dev(dev_new);
+	pdev_cur = to_pci_dev(dev_cur);
+
+	if (pdev_new->bus->number < pdev_cur->bus->number)
+		return true;
+
+	if (PCI_SLOT(pdev_new->devfn) < PCI_SLOT(pdev_cur->devfn))
+		return true;
+
+	if (PCI_FUNC(pdev_new->devfn) < PCI_FUNC(pdev_cur->devfn))
+		return true;
+
+	return false;
+}
+
+static void psp_set_master(struct sp_device *sp)
+{
+	if (!sp_dev_master) {
+		sp_dev_master = sp;
+		return;
+	}
+
+	if (sp_pci_is_master(sp))
+		sp_dev_master = sp;
+}
+
+static struct sp_device *psp_get_master(void)
+{
+	return sp_dev_master;
+}
+
+static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct sp_device *sp;
+	struct sp_pci *sp_pci;
+	struct device *dev = &pdev->dev;
+	void __iomem * const *iomap_table;
+	int bar_mask;
+	int ret;
+
+	ret = -ENOMEM;
+	sp = sp_alloc_struct(dev);
+	if (!sp)
+		goto e_err;
+
+	sp_pci = devm_kzalloc(dev, sizeof(*sp_pci), GFP_KERNEL);
+	if (!sp_pci)
+		goto e_err;
+
+	sp->dev_specific = sp_pci;
+	sp->dev_vdata = (struct sp_dev_vdata *)id->driver_data;
+	if (!sp->dev_vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(dev, "pcim_enable_device failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+	ret = pcim_iomap_regions(pdev, bar_mask, "ccp");
+	if (ret) {
+		dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	iomap_table = pcim_iomap_table(pdev);
+	if (!iomap_table) {
+		dev_err(dev, "pcim_iomap_table failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
+
+	sp->io_map = iomap_table[sp->dev_vdata->bar];
+	if (!sp->io_map) {
+		dev_err(dev, "ioremap failed\n");
+		ret = -ENOMEM;
+		goto e_err;
+	}
+
+	ret = sp_get_irqs(sp);
+	if (ret)
+		goto e_err;
+
+	pci_set_master(pdev);
+	sp->set_psp_master_device = psp_set_master;
+	sp->get_psp_master_device = psp_get_master;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
+				ret);
+			goto free_irqs;
+		}
+	}
+
+	dev_set_drvdata(dev, sp);
+
+	ret = sp_init(sp);
+	if (ret)
+		goto free_irqs;
+
+	dev_notice(dev, "enabled\n");
+
+	return 0;
+
+free_irqs:
+	sp_free_irqs(sp);
+e_err:
+	dev_notice(dev, "initialization failed\n");
+	return ret;
+}
+
+static void sp_pci_remove(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	if (!sp)
+		return;
+
+	sp_destroy(sp);
+
+	sp_free_irqs(sp);
+
+	dev_notice(dev, "disabled\n");
+}
+
+#ifdef CONFIG_PM
+static int sp_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_suspend(sp, state);
+}
+
+static int sp_pci_resume(struct pci_dev *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_resume(sp);
+}
+#endif
+
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+static const struct psp_vdata pspv1 = {
+	.cmdresp_reg		= 0x10580,
+	.cmdbuff_addr_lo_reg	= 0x105e0,
+	.cmdbuff_addr_hi_reg	= 0x105e4,
+	.feature_reg		= 0x105fc,
+	.inten_reg		= 0x10610,
+	.intsts_reg		= 0x10614,
+};
+
+static const struct psp_vdata pspv2 = {
+	.cmdresp_reg		= 0x10980,
+	.cmdbuff_addr_lo_reg	= 0x109e0,
+	.cmdbuff_addr_hi_reg	= 0x109e4,
+	.feature_reg		= 0x109fc,
+	.inten_reg		= 0x10690,
+	.intsts_reg		= 0x10694,
+};
+#endif
+
+static const struct sp_dev_vdata dev_vdata[] = {
+	{	/* 0 */
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3,
+#endif
+	},
+	{	/* 1 */
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5a,
+#endif
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+		.psp_vdata = &pspv1,
+#endif
+	},
+	{	/* 2 */
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5b,
+#endif
+	},
+	{	/* 3 */
+		.bar = 2,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv5a,
+#endif
+#ifdef CONFIG_CRYPTO_DEV_SP_PSP
+		.psp_vdata = &pspv2,
+#endif
+	},
+};
+static const struct pci_device_id sp_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x1537), (kernel_ulong_t)&dev_vdata[0] },
+	{ PCI_VDEVICE(AMD, 0x1456), (kernel_ulong_t)&dev_vdata[1] },
+	{ PCI_VDEVICE(AMD, 0x1468), (kernel_ulong_t)&dev_vdata[2] },
+	{ PCI_VDEVICE(AMD, 0x1486), (kernel_ulong_t)&dev_vdata[3] },
+	/* Last entry must be zero */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, sp_pci_table);
+
+static struct pci_driver sp_pci_driver = {
+	.name = "ccp",
+	.id_table = sp_pci_table,
+	.probe = sp_pci_probe,
+	.remove = sp_pci_remove,
+#ifdef CONFIG_PM
+	.suspend = sp_pci_suspend,
+	.resume = sp_pci_resume,
+#endif
+};
+
+int sp_pci_init(void)
+{
+	return pci_register_driver(&sp_pci_driver);
+}
+
+void sp_pci_exit(void)
+{
+	pci_unregister_driver(&sp_pci_driver);
+}
diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c
new file mode 100644
index 000000000..71734f254
--- /dev/null
+++ b/drivers/crypto/ccp/sp-platform.c
@@ -0,0 +1,256 @@
+/*
+ * AMD Secure Processor device driver
+ *
+ * Copyright (C) 2014,2016 Advanced Micro Devices, Inc.
+ *
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/ioport.h>
+#include <linux/dma-mapping.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+#include <linux/ccp.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/acpi.h>
+
+#include "ccp-dev.h"
+
+struct sp_platform {
+	int coherent;
+	unsigned int irq_count;
+};
+
+static const struct acpi_device_id sp_acpi_match[];
+static const struct of_device_id sp_of_match[];
+
+static struct sp_dev_vdata *sp_get_of_version(struct platform_device *pdev)
+{
+#ifdef CONFIG_OF
+	const struct of_device_id *match;
+
+	match = of_match_node(sp_of_match, pdev->dev.of_node);
+	if (match && match->data)
+		return (struct sp_dev_vdata *)match->data;
+#endif
+	return NULL;
+}
+
+static struct sp_dev_vdata *sp_get_acpi_version(struct platform_device *pdev)
+{
+#ifdef CONFIG_ACPI
+	const struct acpi_device_id *match;
+
+	match = acpi_match_device(sp_acpi_match, &pdev->dev);
+	if (match && match->driver_data)
+		return (struct sp_dev_vdata *)match->driver_data;
+#endif
+	return NULL;
+}
+
+static int sp_get_irqs(struct sp_device *sp)
+{
+	struct sp_platform *sp_platform = sp->dev_specific;
+	struct device *dev = sp->dev;
+	struct platform_device *pdev = to_platform_device(dev);
+	unsigned int i, count;
+	int ret;
+
+	for (i = 0, count = 0; i < pdev->num_resources; i++) {
+		struct resource *res = &pdev->resource[i];
+
+		if (resource_type(res) == IORESOURCE_IRQ)
+			count++;
+	}
+
+	sp_platform->irq_count = count;
+
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_notice(dev, "unable to get IRQ (%d)\n", ret);
+		return ret;
+	}
+
+	sp->psp_irq = ret;
+	if (count == 1) {
+		sp->ccp_irq = ret;
+	} else {
+		ret = platform_get_irq(pdev, 1);
+		if (ret < 0) {
+			dev_notice(dev, "unable to get IRQ (%d)\n", ret);
+			return ret;
+		}
+
+		sp->ccp_irq = ret;
+	}
+
+	return 0;
+}
+
+static int sp_platform_probe(struct platform_device *pdev)
+{
+	struct sp_device *sp;
+	struct sp_platform *sp_platform;
+	struct device *dev = &pdev->dev;
+	enum dev_dma_attr attr;
+	struct resource *ior;
+	int ret;
+
+	ret = -ENOMEM;
+	sp = sp_alloc_struct(dev);
+	if (!sp)
+		goto e_err;
+
+	sp_platform = devm_kzalloc(dev, sizeof(*sp_platform), GFP_KERNEL);
+	if (!sp_platform)
+		goto e_err;
+
+	sp->dev_specific = sp_platform;
+	sp->dev_vdata = pdev->dev.of_node ? sp_get_of_version(pdev)
+					 : sp_get_acpi_version(pdev);
+	if (!sp->dev_vdata) {
+		ret = -ENODEV;
+		dev_err(dev, "missing driver data\n");
+		goto e_err;
+	}
+
+	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	sp->io_map = devm_ioremap_resource(dev, ior);
+	if (IS_ERR(sp->io_map)) {
+		ret = PTR_ERR(sp->io_map);
+		goto e_err;
+	}
+
+	attr = device_get_dma_attr(dev);
+	if (attr == DEV_DMA_NOT_SUPPORTED) {
+		dev_err(dev, "DMA is not supported");
+		goto e_err;
+	}
+
+	sp_platform->coherent = (attr == DEV_DMA_COHERENT);
+	if (sp_platform->coherent)
+		sp->axcache = CACHE_WB_NO_ALLOC;
+	else
+		sp->axcache = CACHE_NONE;
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+	if (ret) {
+		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
+		goto e_err;
+	}
+
+	ret = sp_get_irqs(sp);
+	if (ret)
+		goto e_err;
+
+	dev_set_drvdata(dev, sp);
+
+	ret = sp_init(sp);
+	if (ret)
+		goto e_err;
+
+	dev_notice(dev, "enabled\n");
+
+	return 0;
+
+e_err:
+	dev_notice(dev, "initialization failed\n");
+	return ret;
+}
+
+static int sp_platform_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	sp_destroy(sp);
+
+	dev_notice(dev, "disabled\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_PM
+static int sp_platform_suspend(struct platform_device *pdev,
+				pm_message_t state)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_suspend(sp, state);
+}
+
+static int sp_platform_resume(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct sp_device *sp = dev_get_drvdata(dev);
+
+	return sp_resume(sp);
+}
+#endif
+
+static const struct sp_dev_vdata dev_vdata[] = {
+	{
+		.bar = 0,
+#ifdef CONFIG_CRYPTO_DEV_SP_CCP
+		.ccp_vdata = &ccpv3_platform,
+#endif
+	},
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id sp_acpi_match[] = {
+	{ "AMDI0C00", (kernel_ulong_t)&dev_vdata[0] },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, sp_acpi_match);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id sp_of_match[] = {
+	{ .compatible = "amd,ccp-seattle-v1a",
+	  .data = (const void *)&dev_vdata[0] },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, sp_of_match);
+#endif
+
+static struct platform_driver sp_platform_driver = {
+	.driver = {
+		.name = "ccp",
+#ifdef CONFIG_ACPI
+		.acpi_match_table = sp_acpi_match,
+#endif
+#ifdef CONFIG_OF
+		.of_match_table = sp_of_match,
+#endif
+	},
+	.probe = sp_platform_probe,
+	.remove = sp_platform_remove,
+#ifdef CONFIG_PM
+	.suspend = sp_platform_suspend,
+	.resume = sp_platform_resume,
+#endif
+};
+
+int sp_platform_init(void)
+{
+	return platform_driver_register(&sp_platform_driver);
+}
+
+void sp_platform_exit(void)
+{
+	platform_driver_unregister(&sp_platform_driver);
+}