12 files changed, 2944 insertions, 0 deletions
diff --git a/plat/nvidia/tegra/drivers/bpmp/bpmp.c b/plat/nvidia/tegra/drivers/bpmp/bpmp.c
new file mode 100644
index 0000000..d7db604
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/bpmp/bpmp.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <bpmp.h>
+#include <common/debug.h>
+#include <drivers/delay_timer.h>
+#include <errno.h>
+#include <lib/mmio.h>
+#include <plat/common/platform.h>
+#include <stdbool.h>
+#include <string.h>
+#include <tegra_def.h>
+
+#define BPMP_TIMEOUT	500 /* 500ms */
+
+static uint32_t channel_base[NR_CHANNELS];
+static uint32_t bpmp_init_state = BPMP_INIT_PENDING;
+
+static uint32_t channel_field(unsigned int ch)
+{
+	return mmio_read_32(TEGRA_RES_SEMA_BASE + STA_OFFSET) & CH_MASK(ch);
+}
+
+static bool master_free(unsigned int ch)
+{
+	return channel_field(ch) == MA_FREE(ch);
+}
+
+static bool master_acked(unsigned int ch)
+{
+	return channel_field(ch) == MA_ACKD(ch);
+}
+
+static void signal_slave(unsigned int ch)
+{
+	mmio_write_32(TEGRA_RES_SEMA_BASE + CLR_OFFSET, CH_MASK(ch));
+}
+
+static void free_master(unsigned int ch)
+{
+	mmio_write_32(TEGRA_RES_SEMA_BASE + CLR_OFFSET,
+		      MA_ACKD(ch) ^ MA_FREE(ch));
+}
+
+/* should be called with local irqs disabled */
+int32_t tegra_bpmp_send_receive_atomic(int mrq, const void *ob_data, int ob_sz,
+		void *ib_data, int ib_sz)
+{
+	unsigned int ch = (unsigned int)plat_my_core_pos();
+	mb_data_t *p = (mb_data_t *)(uintptr_t)channel_base[ch];
+	int32_t ret = -ETIMEDOUT, timeout = 0;
+
+	if (bpmp_init_state == BPMP_INIT_COMPLETE) {
+
+		/* loop until BPMP is free */
+		for (timeout = 0; timeout < BPMP_TIMEOUT; timeout++) {
+			if (master_free(ch) == true) {
+				break;
+			}
+
+			mdelay(1);
+		}
+
+		if (timeout != BPMP_TIMEOUT) {
+
+			/* generate the command struct */
+			p->code = mrq;
+			p->flags = DO_ACK;
+			(void)memcpy((void *)p->data, ob_data, (size_t)ob_sz);
+
+			/* signal command ready to the BPMP */
+			signal_slave(ch);
+			mmio_write_32(TEGRA_PRI_ICTLR_BASE + CPU_IEP_FIR_SET,
+				      (1U << INT_SHR_SEM_OUTBOX_FULL));
+
+			/* loop until the command is executed */
+			for (timeout = 0; timeout < BPMP_TIMEOUT; timeout++) {
+				if (master_acked(ch) == true) {
+					break;
+				}
+
+				mdelay(1);
+			}
+
+			if (timeout != BPMP_TIMEOUT) {
+
+				/* get the command response */
+				(void)memcpy(ib_data, (const void *)p->data,
+					     (size_t)ib_sz);
+
+				/* return error code */
+				ret = p->code;
+
+				/* free this channel */
+				free_master(ch);
+			}
+		}
+
+	} else {
+		/* return error code */
+		ret = -EINVAL;
+	}
+
+	if (timeout == BPMP_TIMEOUT) {
+		ERROR("Timed out waiting for bpmp's response\n");
+	}
+
+	return ret;
+}
+
+int tegra_bpmp_init(void)
+{
+	uint32_t val, base, timeout = BPMP_TIMEOUT;
+	unsigned int ch;
+	int ret = 0;
+
+	if (bpmp_init_state == BPMP_INIT_PENDING) {
+
+		/* check if the bpmp processor is alive. */
+		do {
+			val = mmio_read_32(TEGRA_RES_SEMA_BASE + STA_OFFSET);
+			if (val != SIGN_OF_LIFE) {
+				mdelay(1);
+				timeout--;
+			}
+
+		} while ((val != SIGN_OF_LIFE) && (timeout > 0U));
+
+		if (val == SIGN_OF_LIFE) {
+
+			/* check if clock for the atomics block is enabled */
+			val = mmio_read_32(TEGRA_CAR_RESET_BASE + TEGRA_CLK_ENB_V);
+			if ((val & CAR_ENABLE_ATOMICS) == 0) {
+				ERROR("Clock to the atomics block is disabled\n");
+			}
+
+			/* check if the atomics block is out of reset */
+			val = mmio_read_32(TEGRA_CAR_RESET_BASE + TEGRA_RST_DEV_CLR_V);
+			if ((val & CAR_ENABLE_ATOMICS) == CAR_ENABLE_ATOMICS) {
+				ERROR("Reset to the atomics block is asserted\n");
+			}
+
+			/* base address to get the result from Atomics */
+			base = TEGRA_ATOMICS_BASE + RESULT0_REG_OFFSET;
+
+			/* channel area is setup by BPMP before signaling handshake */
+			for (ch = 0; ch < NR_CHANNELS; ch++) {
+
+				/* issue command to get the channel base address */
+				mmio_write_32(base, (ch << TRIGGER_ID_SHIFT) |
+					      ATOMIC_CMD_GET);
+
+				/* get the base address for the channel */
+				channel_base[ch] = mmio_read_32(base);
+
+				/* increment result register offset */
+				base += 4U;
+			}
+
+			/* mark state as "initialized" */
+			bpmp_init_state = BPMP_INIT_COMPLETE;
+
+			/* the channel values have to be visible across all cpus */
+			flush_dcache_range((uint64_t)channel_base,
+					   sizeof(channel_base));
+			flush_dcache_range((uint64_t)&bpmp_init_state,
+					   sizeof(bpmp_init_state));
+
+			INFO("%s: done\n", __func__);
+
+		} else {
+			ERROR("BPMP not powered on\n");
+
+			/* bpmp is not present in the system */
+			bpmp_init_state = BPMP_NOT_PRESENT;
+
+			/* communication timed out */
+			ret = -ETIMEDOUT;
+		}
+	}
+
+	return ret;
+}
+
+void tegra_bpmp_suspend(void)
+{
+	/* freeze the interface */
+	if (bpmp_init_state == BPMP_INIT_COMPLETE) {
+		bpmp_init_state = BPMP_SUSPEND_ENTRY;
+		flush_dcache_range((uint64_t)&bpmp_init_state,
+				   sizeof(bpmp_init_state));
+	}
+}
+
+void tegra_bpmp_resume(void)
+{
+	uint32_t val, timeout = 0;
+
+	if (bpmp_init_state == BPMP_SUSPEND_ENTRY) {
+
+		/* check if the bpmp processor is alive. */
+		do {
+
+			val = mmio_read_32(TEGRA_RES_SEMA_BASE + STA_OFFSET);
+			if (val != SIGN_OF_LIFE) {
+				mdelay(1);
+				timeout++;
+			}
+
+		} while ((val != SIGN_OF_LIFE) && (timeout < BPMP_TIMEOUT));
+
+		if (val == SIGN_OF_LIFE) {
+
+			INFO("%s: BPMP took %d ms to resume\n", __func__, timeout);
+
+			/* mark state as "initialized" */
+			bpmp_init_state = BPMP_INIT_COMPLETE;
+
+			/* state has to be visible across all cpus */
+			flush_dcache_range((uint64_t)&bpmp_init_state,
+					   sizeof(bpmp_init_state));
+		} else {
+			ERROR("BPMP not powered on\n");
+		}
+	}
+}
diff --git a/plat/nvidia/tegra/drivers/bpmp_ipc/intf.c b/plat/nvidia/tegra/drivers/bpmp_ipc/intf.c
new file mode 100644
index 0000000..2e90d25
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/bpmp_ipc/intf.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <bpmp_ipc.h>
+#include <common/debug.h>
+#include <drivers/delay_timer.h>
+#include <errno.h>
+#include <lib/mmio.h>
+#include <lib/utils_def.h>
+#include <stdbool.h>
+#include <string.h>
+#include <tegra_def.h>
+
+#include "intf.h"
+#include "ivc.h"
+
+/**
+ * Holds IVC channel data
+ */
+struct ccplex_bpmp_channel_data {
+	/* Buffer for incoming data */
+	struct frame_data *ib;
+
+	/* Buffer for outgoing data */
+	struct frame_data *ob;
+};
+
+static struct ccplex_bpmp_channel_data s_channel;
+static struct ivc ivc_ccplex_bpmp_channel;
+
+/*
+ * Helper functions to access the HSP doorbell registers
+ */
+static inline uint32_t hsp_db_read(uint32_t reg)
+{
+	return mmio_read_32((uint32_t)(TEGRA_HSP_DBELL_BASE + reg));
+}
+
+static inline void hsp_db_write(uint32_t reg, uint32_t val)
+{
+	mmio_write_32((uint32_t)(TEGRA_HSP_DBELL_BASE + reg), val);
+}
+
+/*******************************************************************************
+ *      IVC wrappers for CCPLEX <-> BPMP communication.
+ ******************************************************************************/
+
+static void tegra_bpmp_ring_bpmp_doorbell(void);
+
+/*
+ * Get the next frame where data can be written.
+ */
+static struct frame_data *tegra_bpmp_get_next_out_frame(void)
+{
+	struct frame_data *frame;
+	const struct ivc *ch = &ivc_ccplex_bpmp_channel;
+
+	frame = (struct frame_data *)tegra_ivc_write_get_next_frame(ch);
+	if (frame == NULL) {
+		ERROR("%s: Error in getting next frame, exiting\n", __func__);
+	} else {
+		s_channel.ob = frame;
+	}
+
+	return frame;
+}
+
+static void tegra_bpmp_signal_slave(void)
+{
+	(void)tegra_ivc_write_advance(&ivc_ccplex_bpmp_channel);
+	tegra_bpmp_ring_bpmp_doorbell();
+}
+
+static int32_t tegra_bpmp_free_master(void)
+{
+	return tegra_ivc_read_advance(&ivc_ccplex_bpmp_channel);
+}
+
+static bool tegra_bpmp_slave_acked(void)
+{
+	struct frame_data *frame;
+	bool ret = true;
+
+	frame = (struct frame_data *)tegra_ivc_read_get_next_frame(&ivc_ccplex_bpmp_channel);
+	if (frame == NULL) {
+		ret = false;
+	} else {
+		s_channel.ib = frame;
+	}
+
+	return ret;
+}
+
+static struct frame_data *tegra_bpmp_get_cur_in_frame(void)
+{
+	return s_channel.ib;
+}
+
+/*
+ * Enables BPMP to ring CCPlex doorbell
+ */
+static void tegra_bpmp_enable_ccplex_doorbell(void)
+{
+	uint32_t reg;
+
+	reg = hsp_db_read(HSP_DBELL_1_ENABLE);
+	reg |= HSP_MASTER_BPMP_BIT;
+	hsp_db_write(HSP_DBELL_1_ENABLE, reg);
+}
+
+/*
+ * CCPlex rings the BPMP doorbell
+ */
+static void tegra_bpmp_ring_bpmp_doorbell(void)
+{
+	/*
+	 * Any writes to this register has the same effect,
+	 * uses master ID of the write transaction and set
+	 * corresponding flag.
+	 */
+	hsp_db_write(HSP_DBELL_3_TRIGGER, HSP_MASTER_CCPLEX_BIT);
+}
+
+/*
+ * Returns true if CCPLex can ring BPMP doorbell, otherwise false.
+ * This also signals that BPMP is up and ready.
+ */
+static bool tegra_bpmp_can_ccplex_ring_doorbell(void)
+{
+	uint32_t reg;
+
+	/* check if ccplex can communicate with bpmp */
+	reg = hsp_db_read(HSP_DBELL_3_ENABLE);
+
+	return ((reg & HSP_MASTER_CCPLEX_BIT) != 0U);
+}
+
+static int32_t tegra_bpmp_wait_for_slave_ack(void)
+{
+	uint32_t timeout = TIMEOUT_RESPONSE_FROM_BPMP_US;
+
+	while (!tegra_bpmp_slave_acked() && (timeout != 0U)) {
+		udelay(1);
+		timeout--;
+	};
+
+	return ((timeout == 0U) ? -ETIMEDOUT : 0);
+}
+
+/*
+ * Notification from the ivc layer
+ */
+static void tegra_bpmp_ivc_notify(const struct ivc *ivc)
+{
+	(void)(ivc);
+
+	tegra_bpmp_ring_bpmp_doorbell();
+}
+
+/*
+ * Atomic send/receive API, which means it waits until slave acks
+ */
+static int32_t tegra_bpmp_ipc_send_req_atomic(uint32_t mrq, void *p_out,
+			uint32_t size_out, void *p_in, uint32_t size_in)
+{
+	struct frame_data *frame = tegra_bpmp_get_next_out_frame();
+	const struct frame_data *f_in = NULL;
+	int32_t ret = 0;
+	void *p_fdata;
+
+	if ((p_out == NULL) || (size_out > IVC_DATA_SZ_BYTES) ||
+	    (frame == NULL)) {
+		ERROR("%s: invalid parameters, exiting\n", __func__);
+		return -EINVAL;
+	}
+
+	/* prepare the command frame */
+	frame->mrq = mrq;
+	frame->flags = FLAG_DO_ACK;
+	p_fdata = frame->data;
+	(void)memcpy(p_fdata, p_out, (size_t)size_out);
+
+	/* signal the slave */
+	tegra_bpmp_signal_slave();
+
+	/* wait for slave to ack */
+	ret = tegra_bpmp_wait_for_slave_ack();
+	if (ret < 0) {
+		ERROR("%s: wait for slave failed (%d)\n", __func__, ret);
+		return ret;
+	}
+
+	/* retrieve the response frame */
+	if ((size_in <= IVC_DATA_SZ_BYTES) && (p_in != NULL)) {
+
+		f_in = tegra_bpmp_get_cur_in_frame();
+		if (f_in != NULL) {
+			ERROR("Failed to get next input frame!\n");
+		} else {
+			(void)memcpy(p_in, p_fdata, (size_t)size_in);
+		}
+	}
+
+	ret = tegra_bpmp_free_master();
+	if (ret < 0) {
+		ERROR("%s: free master failed (%d)\n", __func__, ret);
+	}
+
+	return ret;
+}
+
+/*
+ * Initializes the BPMP<--->CCPlex communication path.
+ */
+int32_t tegra_bpmp_ipc_init(void)
+{
+	size_t msg_size;
+	uint32_t frame_size, timeout;
+	int32_t error = 0;
+
+	/* allow bpmp to ring CCPLEX's doorbell */
+	tegra_bpmp_enable_ccplex_doorbell();
+
+	/* wait for BPMP to actually ring the doorbell */
+	timeout = TIMEOUT_RESPONSE_FROM_BPMP_US;
+	while ((timeout != 0U) && !tegra_bpmp_can_ccplex_ring_doorbell()) {
+		udelay(1); /* bpmp turn-around time */
+		timeout--;
+	}
+
+	if (timeout == 0U) {
+		ERROR("%s: BPMP firmware is not ready\n", __func__);
+		return -ENOTSUP;
+	}
+
+	INFO("%s: BPMP handshake completed\n", __func__);
+
+	msg_size = tegra_ivc_align(IVC_CMD_SZ_BYTES);
+	frame_size = (uint32_t)tegra_ivc_total_queue_size(msg_size);
+	if (frame_size > TEGRA_BPMP_IPC_CH_MAP_SIZE) {
+		ERROR("%s: carveout size is not sufficient\n", __func__);
+		return -EINVAL;
+	}
+
+	error = tegra_ivc_init(&ivc_ccplex_bpmp_channel,
+				(uint32_t)TEGRA_BPMP_IPC_RX_PHYS_BASE,
+				(uint32_t)TEGRA_BPMP_IPC_TX_PHYS_BASE,
+				1U, frame_size, tegra_bpmp_ivc_notify);
+	if (error != 0) {
+
+		ERROR("%s: IVC init failed (%d)\n", __func__, error);
+
+	} else {
+
+		/* reset channel */
+		tegra_ivc_channel_reset(&ivc_ccplex_bpmp_channel);
+
+		/* wait for notification from BPMP */
+		while (tegra_ivc_channel_notified(&ivc_ccplex_bpmp_channel) != 0) {
+			/*
+			 * Interrupt BPMP with doorbell each time after
+			 * tegra_ivc_channel_notified() returns non zero
+			 * value.
+			 */
+			tegra_bpmp_ring_bpmp_doorbell();
+		}
+
+		INFO("%s: All communication channels initialized\n", __func__);
+	}
+
+	return error;
+}
+
+/* Handler to reset a hardware module */
+int32_t tegra_bpmp_ipc_reset_module(uint32_t rst_id)
+{
+	int32_t ret;
+	struct mrq_reset_request req = {
+		.cmd = (uint32_t)CMD_RESET_MODULE,
+		.reset_id = rst_id
+	};
+
+	/* only GPCDMA/XUSB_PADCTL resets are supported */
+	assert((rst_id == TEGRA_RESET_ID_XUSB_PADCTL) ||
+	       (rst_id == TEGRA_RESET_ID_GPCDMA));
+
+	ret = tegra_bpmp_ipc_send_req_atomic(MRQ_RESET, &req,
+			(uint32_t)sizeof(req), NULL, 0);
+	if (ret != 0) {
+		ERROR("%s: failed for module %d with error %d\n", __func__,
+		      rst_id, ret);
+	}
+
+	return ret;
+}
+
+int tegra_bpmp_ipc_enable_clock(uint32_t clk_id)
+{
+	int ret;
+	struct mrq_clk_request req;
+
+	/* only SE clocks are supported */
+	if (clk_id != TEGRA_CLK_SE) {
+		return -ENOTSUP;
+	}
+
+	/* prepare the MRQ_CLK command */
+	req.cmd_and_id = make_mrq_clk_cmd(CMD_CLK_ENABLE, clk_id);
+
+	ret = tegra_bpmp_ipc_send_req_atomic(MRQ_CLK, &req, (uint32_t)sizeof(req),
+			NULL, 0);
+	if (ret != 0) {
+		ERROR("%s: failed for module %d with error %d\n", __func__,
+		      clk_id, ret);
+	}
+
+	return ret;
+}
+
+int tegra_bpmp_ipc_disable_clock(uint32_t clk_id)
+{
+	int ret;
+	struct mrq_clk_request req;
+
+	/* only SE clocks are supported */
+	if (clk_id != TEGRA_CLK_SE) {
+		return -ENOTSUP;
+	}
+
+	/* prepare the MRQ_CLK command */
+	req.cmd_and_id = make_mrq_clk_cmd(CMD_CLK_DISABLE, clk_id);
+
+	ret = tegra_bpmp_ipc_send_req_atomic(MRQ_CLK, &req, (uint32_t)sizeof(req),
+			NULL, 0);
+	if (ret != 0) {
+		ERROR("%s: failed for module %d with error %d\n", __func__,
+		      clk_id, ret);
+	}
+
+	return ret;
+}
diff --git a/plat/nvidia/tegra/drivers/bpmp_ipc/intf.h b/plat/nvidia/tegra/drivers/bpmp_ipc/intf.h
new file mode 100644
index 0000000..d85b906
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/bpmp_ipc/intf.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef BPMP_INTF_H
+#define BPMP_INTF_H
+
+/**
+ * Flags used in IPC req
+ */
+#define FLAG_DO_ACK			(U(1) << 0)
+#define FLAG_RING_DOORBELL		(U(1) << 1)
+
+/* Bit 1 is designated for CCPlex in secure world */
+#define HSP_MASTER_CCPLEX_BIT	(U(1) << 1)
+/* Bit 19 is designated for BPMP in non-secure world */
+#define HSP_MASTER_BPMP_BIT		(U(1) << 19)
+/* Timeout to receive response from BPMP is 1 sec */
+#define TIMEOUT_RESPONSE_FROM_BPMP_US	U(1000000) /* in microseconds */
+
+/**
+ * IVC protocol defines and command/response frame
+ */
+
+/**
+ * IVC specific defines
+ */
+#define IVC_CMD_SZ_BYTES		U(128)
+#define IVC_DATA_SZ_BYTES		U(120)
+
+/**
+ * Holds frame data for an IPC request
+ */
+struct frame_data {
+	/* Identification as to what kind of data is being transmitted */
+	uint32_t mrq;
+
+	/* Flags for slave as to how to respond back */
+	uint32_t flags;
+
+	/* Actual data being sent */
+	uint8_t data[IVC_DATA_SZ_BYTES];
+};
+
+/**
+ * Commands send to the BPMP firmware
+ */
+
+/**
+ * MRQ command codes
+ */
+#define MRQ_RESET			U(20)
+#define MRQ_CLK				U(22)
+
+/**
+ * Reset sub-commands
+ */
+#define CMD_RESET_ASSERT		U(1)
+#define CMD_RESET_DEASSERT		U(2)
+#define CMD_RESET_MODULE		U(3)
+
+/**
+ * Used by the sender of an #MRQ_RESET message to request BPMP to
+ * assert or deassert a given reset line.
+ */
+struct __attribute__((packed)) mrq_reset_request {
+	/* reset action to perform (mrq_reset_commands) */
+	uint32_t cmd;
+	/* id of the reset to affected */
+	uint32_t reset_id;
+};
+
+/**
+ * MRQ_CLK sub-commands
+ *
+ */
+enum {
+	CMD_CLK_GET_RATE = U(1),
+	CMD_CLK_SET_RATE = U(2),
+	CMD_CLK_ROUND_RATE = U(3),
+	CMD_CLK_GET_PARENT = U(4),
+	CMD_CLK_SET_PARENT = U(5),
+	CMD_CLK_IS_ENABLED = U(6),
+	CMD_CLK_ENABLE = U(7),
+	CMD_CLK_DISABLE = U(8),
+	CMD_CLK_GET_ALL_INFO = U(14),
+	CMD_CLK_GET_MAX_CLK_ID = U(15),
+	CMD_CLK_MAX,
+};
+
+/**
+ * Used by the sender of an #MRQ_CLK message to control clocks. The
+ * clk_request is split into several sub-commands. Some sub-commands
+ * require no additional data. Others have a sub-command specific
+ * payload
+ *
+ * |sub-command                 |payload                |
+ * |----------------------------|-----------------------|
+ * |CMD_CLK_GET_RATE            |-                      |
+ * |CMD_CLK_SET_RATE            |clk_set_rate           |
+ * |CMD_CLK_ROUND_RATE          |clk_round_rate         |
+ * |CMD_CLK_GET_PARENT          |-                      |
+ * |CMD_CLK_SET_PARENT          |clk_set_parent         |
+ * |CMD_CLK_IS_ENABLED          |-                      |
+ * |CMD_CLK_ENABLE              |-                      |
+ * |CMD_CLK_DISABLE             |-                      |
+ * |CMD_CLK_GET_ALL_INFO        |-                      |
+ * |CMD_CLK_GET_MAX_CLK_ID      |-                      |
+ *
+ */
+struct mrq_clk_request {
+	/**
+	 * sub-command and clock id concatenated to 32-bit word.
+	 * - bits[31..24] is the sub-cmd.
+	 * - bits[23..0] is the clock id
+	 */
+	uint32_t cmd_and_id;
+};
+
+/**
+ * Macro to prepare the MRQ_CLK sub-command
+ */
+#define make_mrq_clk_cmd(cmd, id)	(((cmd) << 24) | (id & 0xFFFFFF))
+
+#endif /* BPMP_INTF_H */
diff --git a/plat/nvidia/tegra/drivers/bpmp_ipc/ivc.c b/plat/nvidia/tegra/drivers/bpmp_ipc/ivc.c
new file mode 100644
index 0000000..d964fc0
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/bpmp_ipc/ivc.c
@@ -0,0 +1,654 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA CORPORATION. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch_helpers.h>
+#include <assert.h>
+#include <common/debug.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+
+#include "ivc.h"
+
+/*
+ * IVC channel reset protocol.
+ *
+ * Each end uses its tx_channel.state to indicate its synchronization state.
+ */
+enum {
+	/*
+	 * This value is zero for backwards compatibility with services that
+	 * assume channels to be initially zeroed. Such channels are in an
+	 * initially valid state, but cannot be asynchronously reset, and must
+	 * maintain a valid state at all times.
+	 *
+	 * The transmitting end can enter the established state from the sync or
+	 * ack state when it observes the receiving endpoint in the ack or
+	 * established state, indicating that has cleared the counters in our
+	 * rx_channel.
+	 */
+	ivc_state_established = U(0),
+
+	/*
+	 * If an endpoint is observed in the sync state, the remote endpoint is
+	 * allowed to clear the counters it owns asynchronously with respect to
+	 * the current endpoint. Therefore, the current endpoint is no longer
+	 * allowed to communicate.
+	 */
+	ivc_state_sync = U(1),
+
+	/*
+	 * When the transmitting end observes the receiving end in the sync
+	 * state, it can clear the w_count and r_count and transition to the ack
+	 * state. If the remote endpoint observes us in the ack state, it can
+	 * return to the established state once it has cleared its counters.
+	 */
+	ivc_state_ack = U(2)
+};
+
+/*
+ * This structure is divided into two-cache aligned parts, the first is only
+ * written through the tx_channel pointer, while the second is only written
+ * through the rx_channel pointer. This delineates ownership of the cache lines,
+ * which is critical to performance and necessary in non-cache coherent
+ * implementations.
+ */
+struct ivc_channel_header {
+	struct {
+		/* fields owned by the transmitting end */
+		uint32_t w_count;
+		uint32_t state;
+		uint32_t w_rsvd[IVC_CHHDR_TX_FIELDS - 2];
+	};
+	struct {
+		/* fields owned by the receiving end */
+		uint32_t r_count;
+		uint32_t r_rsvd[IVC_CHHDR_RX_FIELDS - 1];
+	};
+};
+
+static inline bool ivc_channel_empty(const struct ivc *ivc,
+		volatile const struct ivc_channel_header *ch)
+{
+	/*
+	 * This function performs multiple checks on the same values with
+	 * security implications, so sample the counters' current values in
+	 * shared memory to ensure that these checks use the same values.
+	 */
+	uint32_t wr_count = ch->w_count;
+	uint32_t rd_count = ch->r_count;
+	bool ret = false;
+
+	(void)ivc;
+
+	/*
+	 * Perform an over-full check to prevent denial of service attacks where
+	 * a server could be easily fooled into believing that there's an
+	 * extremely large number of frames ready, since receivers are not
+	 * expected to check for full or over-full conditions.
+	 *
+	 * Although the channel isn't empty, this is an invalid case caused by
+	 * a potentially malicious peer, so returning empty is safer, because it
+	 * gives the impression that the channel has gone silent.
+	 */
+	if (((wr_count - rd_count) > ivc->nframes) || (wr_count == rd_count)) {
+		ret = true;
+	}
+
+	return ret;
+}
+
+static inline bool ivc_channel_full(const struct ivc *ivc,
+		volatile const struct ivc_channel_header *ch)
+{
+	uint32_t wr_count = ch->w_count;
+	uint32_t rd_count = ch->r_count;
+
+	(void)ivc;
+
+	/*
+	 * Invalid cases where the counters indicate that the queue is over
+	 * capacity also appear full.
+	 */
+	return ((wr_count - rd_count) >= ivc->nframes);
+}
+
+static inline uint32_t ivc_channel_avail_count(const struct ivc *ivc,
+		volatile const struct ivc_channel_header *ch)
+{
+	uint32_t wr_count = ch->w_count;
+	uint32_t rd_count = ch->r_count;
+
+	(void)ivc;
+
+	/*
+	 * This function isn't expected to be used in scenarios where an
+	 * over-full situation can lead to denial of service attacks. See the
+	 * comment in ivc_channel_empty() for an explanation about special
+	 * over-full considerations.
+	 */
+	return (wr_count - rd_count);
+}
+
+static inline void ivc_advance_tx(struct ivc *ivc)
+{
+	ivc->tx_channel->w_count++;
+
+	if (ivc->w_pos == (ivc->nframes - (uint32_t)1U)) {
+		ivc->w_pos = 0U;
+	} else {
+		ivc->w_pos++;
+	}
+}
+
+static inline void ivc_advance_rx(struct ivc *ivc)
+{
+	ivc->rx_channel->r_count++;
+
+	if (ivc->r_pos == (ivc->nframes - (uint32_t)1U)) {
+		ivc->r_pos = 0U;
+	} else {
+		ivc->r_pos++;
+	}
+}
+
+static inline int32_t ivc_check_read(const struct ivc *ivc)
+{
+	/*
+	 * tx_channel->state is set locally, so it is not synchronized with
+	 * state from the remote peer. The remote peer cannot reset its
+	 * transmit counters until we've acknowledged its synchronization
+	 * request, so no additional synchronization is required because an
+	 * asynchronous transition of rx_channel->state to ivc_state_ack is not
+	 * allowed.
+	 */
+	if (ivc->tx_channel->state != ivc_state_established) {
+		return -ECONNRESET;
+	}
+
+	/*
+	* Avoid unnecessary invalidations when performing repeated accesses to
+	* an IVC channel by checking the old queue pointers first.
+	* Synchronization is only necessary when these pointers indicate empty
+	* or full.
+	*/
+	if (!ivc_channel_empty(ivc, ivc->rx_channel)) {
+		return 0;
+	}
+
+	return ivc_channel_empty(ivc, ivc->rx_channel) ? -ENOMEM : 0;
+}
+
+static inline int32_t ivc_check_write(const struct ivc *ivc)
+{
+	if (ivc->tx_channel->state != ivc_state_established) {
+		return -ECONNRESET;
+	}
+
+	if (!ivc_channel_full(ivc, ivc->tx_channel)) {
+		return 0;
+	}
+
+	return ivc_channel_full(ivc, ivc->tx_channel) ? -ENOMEM : 0;
+}
+
+bool tegra_ivc_can_read(const struct ivc *ivc)
+{
+	return ivc_check_read(ivc) == 0;
+}
+
+bool tegra_ivc_can_write(const struct ivc *ivc)
+{
+	return ivc_check_write(ivc) == 0;
+}
+
+bool tegra_ivc_tx_empty(const struct ivc *ivc)
+{
+	return ivc_channel_empty(ivc, ivc->tx_channel);
+}
+
+static inline uintptr_t calc_frame_offset(uint32_t frame_index,
+	uint32_t frame_size, uint32_t frame_offset)
+{
+    return ((uintptr_t)frame_index * (uintptr_t)frame_size) +
+	    (uintptr_t)frame_offset;
+}
+
+static void *ivc_frame_pointer(const struct ivc *ivc,
+				volatile const struct ivc_channel_header *ch,
+				uint32_t frame)
+{
+	assert(frame < ivc->nframes);
+	return (void *)((uintptr_t)(&ch[1]) +
+		calc_frame_offset(frame, ivc->frame_size, 0));
+}
+
+int32_t tegra_ivc_read(struct ivc *ivc, void *buf, size_t max_read)
+{
+	const void *src;
+	int32_t result;
+
+	if (buf == NULL) {
+		return -EINVAL;
+	}
+
+	if (max_read > ivc->frame_size) {
+		return -E2BIG;
+	}
+
+	result = ivc_check_read(ivc);
+	if (result != 0) {
+		return result;
+	}
+
+	/*
+	 * Order observation of w_pos potentially indicating new data before
+	 * data read.
+	 */
+	dmbish();
+
+	src = ivc_frame_pointer(ivc, ivc->rx_channel, ivc->r_pos);
+
+	(void)memcpy(buf, src, max_read);
+
+	ivc_advance_rx(ivc);
+
+	/*
+	 * Ensure our write to r_pos occurs before our read from w_pos.
+	 */
+	dmbish();
+
+	/*
+	 * Notify only upon transition from full to non-full.
+	 * The available count can only asynchronously increase, so the
+	 * worst possible side-effect will be a spurious notification.
+	 */
+	if (ivc_channel_avail_count(ivc, ivc->rx_channel) == (ivc->nframes - (uint32_t)1U)) {
+		ivc->notify(ivc);
+	}
+
+	return (int32_t)max_read;
+}
+
+/* directly peek at the next frame rx'ed */
+void *tegra_ivc_read_get_next_frame(const struct ivc *ivc)
+{
+	if (ivc_check_read(ivc) != 0) {
+		return NULL;
+	}
+
+	/*
+	 * Order observation of w_pos potentially indicating new data before
+	 * data read.
+	 */
+	dmbld();
+
+	return ivc_frame_pointer(ivc, ivc->rx_channel, ivc->r_pos);
+}
+
+int32_t tegra_ivc_read_advance(struct ivc *ivc)
+{
+	/*
+	 * No read barriers or synchronization here: the caller is expected to
+	 * have already observed the channel non-empty. This check is just to
+	 * catch programming errors.
+	 */
+	int32_t result = ivc_check_read(ivc);
+	if (result != 0) {
+		return result;
+	}
+
+	ivc_advance_rx(ivc);
+
+	/*
+	 * Ensure our write to r_pos occurs before our read from w_pos.
+	 */
+	dmbish();
+
+	/*
+	 * Notify only upon transition from full to non-full.
+	 * The available count can only asynchronously increase, so the
+	 * worst possible side-effect will be a spurious notification.
+	 */
+	if (ivc_channel_avail_count(ivc, ivc->rx_channel) == (ivc->nframes - (uint32_t)1U)) {
+		ivc->notify(ivc);
+	}
+
+	return 0;
+}
+
+int32_t tegra_ivc_write(struct ivc *ivc, const void *buf, size_t size)
+{
+	void *p;
+	int32_t result;
+
+	if ((buf == NULL) || (ivc == NULL)) {
+		return -EINVAL;
+	}
+
+	if (size > ivc->frame_size) {
+		return -E2BIG;
+	}
+
+	result = ivc_check_write(ivc);
+	if (result != 0) {
+		return result;
+	}
+
+	p = ivc_frame_pointer(ivc, ivc->tx_channel, ivc->w_pos);
+
+	(void)memset(p, 0, ivc->frame_size);
+	(void)memcpy(p, buf, size);
+
+	/*
+	 * Ensure that updated data is visible before the w_pos counter
+	 * indicates that it is ready.
+	 */
+	dmbst();
+
+	ivc_advance_tx(ivc);
+
+	/*
+	 * Ensure our write to w_pos occurs before our read from r_pos.
+	 */
+	dmbish();
+
+	/*
+	 * Notify only upon transition from empty to non-empty.
+	 * The available count can only asynchronously decrease, so the
+	 * worst possible side-effect will be a spurious notification.
+	 */
+	if (ivc_channel_avail_count(ivc, ivc->tx_channel) == 1U) {
+		ivc->notify(ivc);
+	}
+
+	return (int32_t)size;
+}
+
+/* directly poke at the next frame to be tx'ed */
+void *tegra_ivc_write_get_next_frame(const struct ivc *ivc)
+{
+	if (ivc_check_write(ivc) != 0) {
+		return NULL;
+	}
+
+	return ivc_frame_pointer(ivc, ivc->tx_channel, ivc->w_pos);
+}
+
+/* advance the tx buffer */
+int32_t tegra_ivc_write_advance(struct ivc *ivc)
+{
+	int32_t result = ivc_check_write(ivc);
+
+	if (result != 0) {
+		return result;
+	}
+
+	/*
+	 * Order any possible stores to the frame before update of w_pos.
+	 */
+	dmbst();
+
+	ivc_advance_tx(ivc);
+
+	/*
+	 * Ensure our write to w_pos occurs before our read from r_pos.
+	 */
+	dmbish();
+
+	/*
+	 * Notify only upon transition from empty to non-empty.
+	 * The available count can only asynchronously decrease, so the
+	 * worst possible side-effect will be a spurious notification.
+	 */
+	if (ivc_channel_avail_count(ivc, ivc->tx_channel) == (uint32_t)1U) {
+		ivc->notify(ivc);
+	}
+
+	return 0;
+}
+
+void tegra_ivc_channel_reset(const struct ivc *ivc)
+{
+	ivc->tx_channel->state = ivc_state_sync;
+	ivc->notify(ivc);
+}
+
+/*
+ * ===============================================================
+ *  IVC State Transition Table - see tegra_ivc_channel_notified()
+ * ===============================================================
+ *
+ *	local	remote	action
+ *	-----	------	-----------------------------------
+ *	SYNC	EST	<none>
+ *	SYNC	ACK	reset counters; move to EST; notify
+ *	SYNC	SYNC	reset counters; move to ACK; notify
+ *	ACK	EST	move to EST; notify
+ *	ACK	ACK	move to EST; notify
+ *	ACK	SYNC	reset counters; move to ACK; notify
+ *	EST	EST	<none>
+ *	EST	ACK	<none>
+ *	EST	SYNC	reset counters; move to ACK; notify
+ *
+ * ===============================================================
+ */
+int32_t tegra_ivc_channel_notified(struct ivc *ivc)
+{
+	uint32_t peer_state;
+
+	/* Copy the receiver's state out of shared memory. */
+	peer_state = ivc->rx_channel->state;
+
+	if (peer_state == (uint32_t)ivc_state_sync) {
+		/*
+		 * Order observation of ivc_state_sync before stores clearing
+		 * tx_channel.
+		 */
+		dmbld();
+
+		/*
+		 * Reset tx_channel counters. The remote end is in the SYNC
+		 * state and won't make progress until we change our state,
+		 * so the counters are not in use at this time.
+		 */
+		ivc->tx_channel->w_count = 0U;
+		ivc->rx_channel->r_count = 0U;
+
+		ivc->w_pos = 0U;
+		ivc->r_pos = 0U;
+
+		/*
+		 * Ensure that counters appear cleared before new state can be
+		 * observed.
+		 */
+		dmbst();
+
+		/*
+		 * Move to ACK state. We have just cleared our counters, so it
+		 * is now safe for the remote end to start using these values.
+		 */
+		ivc->tx_channel->state = ivc_state_ack;
+
+		/*
+		 * Notify remote end to observe state transition.
+		 */
+		ivc->notify(ivc);
+
+	} else if ((ivc->tx_channel->state == (uint32_t)ivc_state_sync) &&
+			(peer_state == (uint32_t)ivc_state_ack)) {
+		/*
+		 * Order observation of ivc_state_sync before stores clearing
+		 * tx_channel.
+		 */
+		dmbld();
+
+		/*
+		 * Reset tx_channel counters. The remote end is in the ACK
+		 * state and won't make progress until we change our state,
+		 * so the counters are not in use at this time.
+		 */
+		ivc->tx_channel->w_count = 0U;
+		ivc->rx_channel->r_count = 0U;
+
+		ivc->w_pos = 0U;
+		ivc->r_pos = 0U;
+
+		/*
+		 * Ensure that counters appear cleared before new state can be
+		 * observed.
+		 */
+		dmbst();
+
+		/*
+		 * Move to ESTABLISHED state. We know that the remote end has
+		 * already cleared its counters, so it is safe to start
+		 * writing/reading on this channel.
+		 */
+		ivc->tx_channel->state = ivc_state_established;
+
+		/*
+		 * Notify remote end to observe state transition.
+		 */
+		ivc->notify(ivc);
+
+	} else if (ivc->tx_channel->state == (uint32_t)ivc_state_ack) {
+		/*
+		 * At this point, we have observed the peer to be in either
+		 * the ACK or ESTABLISHED state. Next, order observation of
+		 * peer state before storing to tx_channel.
+		 */
+		dmbld();
+
+		/*
+		 * Move to ESTABLISHED state. We know that we have previously
+		 * cleared our counters, and we know that the remote end has
+		 * cleared its counters, so it is safe to start writing/reading
+		 * on this channel.
+		 */
+		ivc->tx_channel->state = ivc_state_established;
+
+		/*
+		 * Notify remote end to observe state transition.
+		 */
+		ivc->notify(ivc);
+
+	} else {
+		/*
+		 * There is no need to handle any further action. Either the
+		 * channel is already fully established, or we are waiting for
+		 * the remote end to catch up with our current state. Refer
+		 * to the diagram in "IVC State Transition Table" above.
+		 */
+	}
+
+	return ((ivc->tx_channel->state == (uint32_t)ivc_state_established) ? 0 : -EAGAIN);
+}
+
+size_t tegra_ivc_align(size_t size)
+{
+	return (size + (IVC_ALIGN - 1U)) & ~(IVC_ALIGN - 1U);
+}
+
+size_t tegra_ivc_total_queue_size(size_t queue_size)
+{
+	if ((queue_size & (IVC_ALIGN - 1U)) != 0U) {
+		ERROR("queue_size (%d) must be %d-byte aligned\n",
+				(int32_t)queue_size, IVC_ALIGN);
+		return 0;
+	}
+	return queue_size + sizeof(struct ivc_channel_header);
+}
+
+static int32_t check_ivc_params(uintptr_t queue_base1, uintptr_t queue_base2,
+		uint32_t nframes, uint32_t frame_size)
+{
+	assert((offsetof(struct ivc_channel_header, w_count)
+				& (IVC_ALIGN - 1U)) == 0U);
+	assert((offsetof(struct ivc_channel_header, r_count)
+				& (IVC_ALIGN - 1U)) == 0U);
+	assert((sizeof(struct ivc_channel_header) & (IVC_ALIGN - 1U)) == 0U);
+
+	if (((uint64_t)nframes * (uint64_t)frame_size) >= 0x100000000ULL) {
+		ERROR("nframes * frame_size overflows\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * The headers must at least be aligned enough for counters
+	 * to be accessed atomically.
+	 */
+	if ((queue_base1 & (IVC_ALIGN - 1U)) != 0U) {
+		ERROR("ivc channel start not aligned: %lx\n", queue_base1);
+		return -EINVAL;
+	}
+	if ((queue_base2 & (IVC_ALIGN - 1U)) != 0U) {
+		ERROR("ivc channel start not aligned: %lx\n", queue_base2);
+		return -EINVAL;
+	}
+
+	if ((frame_size & (IVC_ALIGN - 1U)) != 0U) {
+		ERROR("frame size not adequately aligned: %u\n",
+				frame_size);
+		return -EINVAL;
+	}
+
+	if (queue_base1 < queue_base2) {
+		if ((queue_base1 + ((uint64_t)frame_size * nframes)) > queue_base2) {
+			ERROR("queue regions overlap: %lx + %x, %x\n",
+					queue_base1, frame_size,
+					frame_size * nframes);
+			return -EINVAL;
+		}
+	} else {
+		if ((queue_base2 + ((uint64_t)frame_size * nframes)) > queue_base1) {
+			ERROR("queue regions overlap: %lx + %x, %x\n",
+					queue_base2, frame_size,
+					frame_size * nframes);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+int32_t tegra_ivc_init(struct ivc *ivc, uintptr_t rx_base, uintptr_t tx_base,
+		uint32_t nframes, uint32_t frame_size,
+		ivc_notify_function notify)
+{
+	int32_t result;
+
+	/* sanity check input params */
+	if ((ivc == NULL) || (notify == NULL)) {
+		return -EINVAL;
+	}
+
+	result = check_ivc_params(rx_base, tx_base, nframes, frame_size);
+	if (result != 0) {
+		return result;
+	}
+
+	/*
+	 * All sizes that can be returned by communication functions should
+	 * fit in a 32-bit integer.
+	 */
+	if (frame_size > (1u << 31)) {
+		return -E2BIG;
+	}
+
+	ivc->rx_channel = (struct ivc_channel_header *)rx_base;
+	ivc->tx_channel = (struct ivc_channel_header *)tx_base;
+	ivc->notify = notify;
+	ivc->frame_size = frame_size;
+	ivc->nframes = nframes;
+	ivc->w_pos = 0U;
+	ivc->r_pos = 0U;
+
+	INFO("%s: done\n", __func__);
+
+	return 0;
+}
diff --git a/plat/nvidia/tegra/drivers/bpmp_ipc/ivc.h b/plat/nvidia/tegra/drivers/bpmp_ipc/ivc.h
new file mode 100644
index 0000000..1b31821
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/bpmp_ipc/ivc.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017-2020, NVIDIA Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#ifndef BPMP_IVC_H
+#define BPMP_IVC_H
+
+#include <lib/utils_def.h>
+#include <stdint.h>
+#include <stddef.h>
+
+#define IVC_ALIGN		U(64)
+#define IVC_CHHDR_TX_FIELDS	U(16)
+#define IVC_CHHDR_RX_FIELDS	U(16)
+
+struct ivc_channel_header;
+
+struct ivc {
+	struct ivc_channel_header *rx_channel;
+	struct ivc_channel_header *tx_channel;
+	uint32_t w_pos;
+	uint32_t r_pos;
+	void (*notify)(const struct ivc *);
+	uint32_t nframes;
+	uint32_t frame_size;
+};
+
+/* callback handler for notify on receiving a response */
+typedef void (* ivc_notify_function)(const struct ivc *);
+
+int32_t tegra_ivc_init(struct ivc *ivc, uintptr_t rx_base, uintptr_t tx_base,
+		uint32_t nframes, uint32_t frame_size,
+		ivc_notify_function notify);
+size_t tegra_ivc_total_queue_size(size_t queue_size);
+size_t tegra_ivc_align(size_t size);
+int32_t tegra_ivc_channel_notified(struct ivc *ivc);
+void tegra_ivc_channel_reset(const struct ivc *ivc);
+int32_t tegra_ivc_write_advance(struct ivc *ivc);
+void *tegra_ivc_write_get_next_frame(const struct ivc *ivc);
+int32_t tegra_ivc_write(struct ivc *ivc, const void *buf, size_t size);
+int32_t tegra_ivc_read_advance(struct ivc *ivc);
+void *tegra_ivc_read_get_next_frame(const struct ivc *ivc);
+int32_t tegra_ivc_read(struct ivc *ivc, void *buf, size_t max_read);
+bool tegra_ivc_tx_empty(const struct ivc *ivc);
+bool tegra_ivc_can_write(const struct ivc *ivc);
+bool tegra_ivc_can_read(const struct ivc *ivc);
+
+#endif /* BPMP_IVC_H */
diff --git a/plat/nvidia/tegra/drivers/flowctrl/flowctrl.c b/plat/nvidia/tegra/drivers/flowctrl/flowctrl.c
new file mode 100644
index 0000000..4c9f4af
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/flowctrl/flowctrl.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright (c) 2015-2018, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+
+#include <arch_helpers.h>
+#include <cortex_a53.h>
+#include <common/debug.h>
+#include <drivers/delay_timer.h>
+#include <lib/mmio.h>
+
+#include <flowctrl.h>
+#include <lib/utils_def.h>
+#include <pmc.h>
+#include <tegra_def.h>
+
+#define CLK_RST_DEV_L_SET		0x300
+#define CLK_RST_DEV_L_CLR		0x304
+#define  CLK_BPMP_RST			(1 << 1)
+
+#define EVP_BPMP_RESET_VECTOR		0x200
+
+static const uint64_t flowctrl_offset_cpu_csr[4] = {
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_CPU0_CSR),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_CPU1_CSR),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_CPU1_CSR + 8),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_CPU1_CSR + 16)
+};
+
+static const uint64_t flowctrl_offset_halt_cpu[4] = {
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_HALT_CPU0_EVENTS),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_HALT_CPU1_EVENTS),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_HALT_CPU1_EVENTS + 8),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_HALT_CPU1_EVENTS + 16)
+};
+
+static const uint64_t flowctrl_offset_cc4_ctrl[4] = {
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_CC4_CORE0_CTRL),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_CC4_CORE0_CTRL + 4),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_CC4_CORE0_CTRL + 8),
+	(TEGRA_FLOWCTRL_BASE + FLOWCTRL_CC4_CORE0_CTRL + 12)
+};
+
+static inline void tegra_fc_cc4_ctrl(int cpu_id, uint32_t val)
+{
+	mmio_write_32(flowctrl_offset_cc4_ctrl[cpu_id], val);
+	val = mmio_read_32(flowctrl_offset_cc4_ctrl[cpu_id]);
+}
+
+static inline void tegra_fc_cpu_csr(int cpu_id, uint32_t val)
+{
+	mmio_write_32(flowctrl_offset_cpu_csr[cpu_id], val);
+	val = mmio_read_32(flowctrl_offset_cpu_csr[cpu_id]);
+}
+
+static inline void tegra_fc_halt_cpu(int cpu_id, uint32_t val)
+{
+	mmio_write_32(flowctrl_offset_halt_cpu[cpu_id], val);
+	val = mmio_read_32(flowctrl_offset_halt_cpu[cpu_id]);
+}
+
+static void tegra_fc_prepare_suspend(int cpu_id, uint32_t csr)
+{
+	uint32_t val;
+
+	val = FLOWCTRL_HALT_GIC_IRQ | FLOWCTRL_HALT_GIC_FIQ |
+	      FLOWCTRL_HALT_LIC_IRQ | FLOWCTRL_HALT_LIC_FIQ |
+	      FLOWCTRL_WAITEVENT;
+	tegra_fc_halt_cpu(cpu_id, val);
+
+	val = FLOWCTRL_CSR_INTR_FLAG | FLOWCTRL_CSR_EVENT_FLAG |
+	      FLOWCTRL_CSR_ENABLE | (FLOWCTRL_WAIT_WFI_BITMAP << cpu_id);
+	tegra_fc_cpu_csr(cpu_id, val | csr);
+}
+
+/*******************************************************************************
+ * After this, no core can wake from C7 until the action is reverted.
+ * If a wake up event is asserted, the FC state machine will stall until
+ * the action is reverted.
+ ******************************************************************************/
+void tegra_fc_ccplex_pgexit_lock(void)
+{
+	unsigned int i, cpu = read_mpidr() & MPIDR_CPU_MASK;
+	uint32_t flags = tegra_fc_read_32(FLOWCTRL_FC_SEQ_INTERCEPT) & ~INTERCEPT_IRQ_PENDING;
+	uint32_t icept_cpu_flags[] = {
+		INTERCEPT_EXIT_PG_CORE0,
+		INTERCEPT_EXIT_PG_CORE1,
+		INTERCEPT_EXIT_PG_CORE2,
+		INTERCEPT_EXIT_PG_CORE3
+	};
+
+	/* set the intercept flags */
+	for (i = 0; i < ARRAY_SIZE(icept_cpu_flags); i++) {
+
+		/* skip current CPU */
+		if (i == cpu)
+			continue;
+
+		/* enable power gate exit intercept locks */
+		flags |= icept_cpu_flags[i];
+	}
+
+	tegra_fc_write_32(FLOWCTRL_FC_SEQ_INTERCEPT, flags);
+	(void)tegra_fc_read_32(FLOWCTRL_FC_SEQ_INTERCEPT);
+}
+
+/*******************************************************************************
+ * Revert the ccplex powergate exit locks
+ ******************************************************************************/
+void tegra_fc_ccplex_pgexit_unlock(void)
+{
+	/* clear lock bits, clear pending interrupts */
+	tegra_fc_write_32(FLOWCTRL_FC_SEQ_INTERCEPT, INTERCEPT_IRQ_PENDING);
+	(void)tegra_fc_read_32(FLOWCTRL_FC_SEQ_INTERCEPT);
+}
+
+/*******************************************************************************
+ * Powerdn the current CPU
+ ******************************************************************************/
+void tegra_fc_cpu_powerdn(uint32_t mpidr)
+{
+	int cpu = mpidr & MPIDR_CPU_MASK;
+
+	VERBOSE("CPU%d powering down...\n", cpu);
+	tegra_fc_prepare_suspend(cpu, 0);
+}
+
+/*******************************************************************************
+ * Suspend the current CPU cluster
+ ******************************************************************************/
+void tegra_fc_cluster_idle(uint32_t mpidr)
+{
+	int cpu = mpidr & MPIDR_CPU_MASK;
+	uint32_t val;
+
+	VERBOSE("Entering cluster idle state...\n");
+
+	tegra_fc_cc4_ctrl(cpu, 0);
+
+	/* hardware L2 flush is faster for A53 only */
+	tegra_fc_write_32(FLOWCTRL_L2_FLUSH_CONTROL,
+		!!MPIDR_AFFLVL1_VAL(mpidr));
+
+	/* suspend the CPU cluster */
+	val = FLOWCTRL_PG_CPU_NONCPU << FLOWCTRL_ENABLE_EXT;
+	tegra_fc_prepare_suspend(cpu, val);
+}
+
+/*******************************************************************************
+ * Power down the current CPU cluster
+ ******************************************************************************/
+void tegra_fc_cluster_powerdn(uint32_t mpidr)
+{
+	int cpu = mpidr & MPIDR_CPU_MASK;
+	uint32_t val;
+
+	VERBOSE("Entering cluster powerdn state...\n");
+
+	tegra_fc_cc4_ctrl(cpu, 0);
+
+	/* hardware L2 flush is faster for A53 only */
+	tegra_fc_write_32(FLOWCTRL_L2_FLUSH_CONTROL,
+		read_midr() == CORTEX_A53_MIDR);
+
+	/* power down the CPU cluster */
+	val = FLOWCTRL_TURNOFF_CPURAIL << FLOWCTRL_ENABLE_EXT;
+	tegra_fc_prepare_suspend(cpu, val);
+}
+
+/*******************************************************************************
+ * Check if cluster idle or power down state is allowed from this CPU
+ ******************************************************************************/
+bool tegra_fc_is_ccx_allowed(void)
+{
+	unsigned int i, cpu = read_mpidr() & MPIDR_CPU_MASK;
+	uint32_t val;
+	bool ccx_allowed = true;
+
+	for (i = 0; i < ARRAY_SIZE(flowctrl_offset_cpu_csr); i++) {
+
+		/* skip current CPU */
+		if (i == cpu)
+			continue;
+
+		/* check if all other CPUs are already halted */
+		val = mmio_read_32(flowctrl_offset_cpu_csr[i]);
+		if ((val & FLOWCTRL_CSR_HALT_MASK) == 0U) {
+			ccx_allowed = false;
+		}
+	}
+
+	return ccx_allowed;
+}
+
+/*******************************************************************************
+ * Suspend the entire SoC
+ ******************************************************************************/
+void tegra_fc_soc_powerdn(uint32_t mpidr)
+{
+	int cpu = mpidr & MPIDR_CPU_MASK;
+	uint32_t val;
+
+	VERBOSE("Entering SoC powerdn state...\n");
+
+	tegra_fc_cc4_ctrl(cpu, 0);
+
+	tegra_fc_write_32(FLOWCTRL_L2_FLUSH_CONTROL, 1);
+
+	val = FLOWCTRL_TURNOFF_CPURAIL << FLOWCTRL_ENABLE_EXT;
+	tegra_fc_prepare_suspend(cpu, val);
+
+	/* overwrite HALT register */
+	tegra_fc_halt_cpu(cpu, FLOWCTRL_WAITEVENT);
+}
+
+/*******************************************************************************
+ * Power up the CPU
+ ******************************************************************************/
+void tegra_fc_cpu_on(int cpu)
+{
+	tegra_fc_cpu_csr(cpu, FLOWCTRL_CSR_ENABLE);
+	tegra_fc_halt_cpu(cpu, FLOWCTRL_WAITEVENT | FLOWCTRL_HALT_SCLK);
+}
+
+/*******************************************************************************
+ * Power down the CPU
+ ******************************************************************************/
+void tegra_fc_cpu_off(int cpu)
+{
+	uint32_t val;
+
+	/*
+	 * Flow controller powers down the CPU during wfi. The CPU would be
+	 * powered on when it receives any interrupt.
+	 */
+	val = FLOWCTRL_CSR_INTR_FLAG | FLOWCTRL_CSR_EVENT_FLAG |
+		FLOWCTRL_CSR_ENABLE | (FLOWCTRL_WAIT_WFI_BITMAP << cpu);
+	tegra_fc_cpu_csr(cpu, val);
+	tegra_fc_halt_cpu(cpu, FLOWCTRL_WAITEVENT);
+	tegra_fc_cc4_ctrl(cpu, 0);
+}
+
+/*******************************************************************************
+ * Inform the BPMP that we have completed the cluster power up
+ ******************************************************************************/
+void tegra_fc_lock_active_cluster(void)
+{
+	uint32_t val;
+
+	val = tegra_fc_read_32(FLOWCTRL_BPMP_CLUSTER_CONTROL);
+	val |= FLOWCTRL_BPMP_CLUSTER_PWRON_LOCK;
+	tegra_fc_write_32(FLOWCTRL_BPMP_CLUSTER_CONTROL, val);
+	val = tegra_fc_read_32(FLOWCTRL_BPMP_CLUSTER_CONTROL);
+}
+
+/*******************************************************************************
+ * Power ON BPMP processor
+ ******************************************************************************/
+void tegra_fc_bpmp_on(uint32_t entrypoint)
+{
+	/* halt BPMP */
+	tegra_fc_write_32(FLOWCTRL_HALT_BPMP_EVENTS, FLOWCTRL_WAITEVENT);
+
+	/* Assert BPMP reset */
+	mmio_write_32(TEGRA_CAR_RESET_BASE + CLK_RST_DEV_L_SET, CLK_BPMP_RST);
+
+	/* Set reset address (stored in PMC_SCRATCH39) */
+	mmio_write_32(TEGRA_EVP_BASE + EVP_BPMP_RESET_VECTOR, entrypoint);
+	while (entrypoint != mmio_read_32(TEGRA_EVP_BASE + EVP_BPMP_RESET_VECTOR))
+		; /* wait till value reaches EVP_BPMP_RESET_VECTOR */
+
+	/* Wait for 2us before de-asserting the reset signal. */
+	udelay(2);
+
+	/* De-assert BPMP reset */
+	mmio_write_32(TEGRA_CAR_RESET_BASE + CLK_RST_DEV_L_CLR, CLK_BPMP_RST);
+
+	/* Un-halt BPMP */
+	tegra_fc_write_32(FLOWCTRL_HALT_BPMP_EVENTS, 0);
+}
+
+/*******************************************************************************
+ * Power OFF BPMP processor
+ ******************************************************************************/
+void tegra_fc_bpmp_off(void)
+{
+	/* halt BPMP */
+	tegra_fc_write_32(FLOWCTRL_HALT_BPMP_EVENTS, FLOWCTRL_WAITEVENT);
+
+	/* Assert BPMP reset */
+	mmio_write_32(TEGRA_CAR_RESET_BASE + CLK_RST_DEV_L_SET, CLK_BPMP_RST);
+
+	/* Clear reset address */
+	mmio_write_32(TEGRA_EVP_BASE + EVP_BPMP_RESET_VECTOR, 0);
+	while (0 != mmio_read_32(TEGRA_EVP_BASE + EVP_BPMP_RESET_VECTOR))
+		; /* wait till value reaches EVP_BPMP_RESET_VECTOR */
+}
+
+/*******************************************************************************
+ * Route legacy FIQ to the GICD
+ ******************************************************************************/
+void tegra_fc_enable_fiq_to_ccplex_routing(void)
+{
+	uint32_t val = tegra_fc_read_32(FLOW_CTLR_FLOW_DBG_QUAL);
+
+	/* set the bit to pass FIQs to the GICD */
+	tegra_fc_write_32(FLOW_CTLR_FLOW_DBG_QUAL, val | FLOWCTRL_FIQ2CCPLEX_ENABLE);
+}
+
+/*******************************************************************************
+ * Disable routing legacy FIQ to the GICD
+ ******************************************************************************/
+void tegra_fc_disable_fiq_to_ccplex_routing(void)
+{
+	uint32_t val = tegra_fc_read_32(FLOW_CTLR_FLOW_DBG_QUAL);
+
+	/* clear the bit to pass FIQs to the GICD */
+	tegra_fc_write_32(FLOW_CTLR_FLOW_DBG_QUAL, val & ~FLOWCTRL_FIQ2CCPLEX_ENABLE);
+}
diff --git a/plat/nvidia/tegra/drivers/gpcdma/gpcdma.c b/plat/nvidia/tegra/drivers/gpcdma/gpcdma.c
new file mode 100644
index 0000000..d68cdfd
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/gpcdma/gpcdma.c
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2017, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <arch_helpers.h>
+#include <common/debug.h>
+#include <drivers/delay_timer.h>
+#include <errno.h>
+#include <gpcdma.h>
+#include <lib/mmio.h>
+#include <lib/utils_def.h>
+#include <platform_def.h>
+#include <stdbool.h>
+#include <tegra_def.h>
+
+/* DMA channel registers */
+#define DMA_CH_CSR				U(0x0)
+#define DMA_CH_CSR_WEIGHT_SHIFT			U(10)
+#define DMA_CH_CSR_XFER_MODE_SHIFT		U(21)
+#define DMA_CH_CSR_DMA_MODE_MEM2MEM		U(4)
+#define DMA_CH_CSR_DMA_MODE_FIXEDPATTERN	U(6)
+#define DMA_CH_CSR_IRQ_MASK_ENABLE		(U(1) << 15)
+#define DMA_CH_CSR_RUN_ONCE			(U(1) << 27)
+#define DMA_CH_CSR_ENABLE			(U(1) << 31)
+
+#define DMA_CH_STAT				U(0x4)
+#define DMA_CH_STAT_BUSY			(U(1) << 31)
+
+#define DMA_CH_SRC_PTR				U(0xC)
+
+#define DMA_CH_DST_PTR				U(0x10)
+
+#define DMA_CH_HI_ADR_PTR			U(0x14)
+#define DMA_CH_HI_ADR_PTR_SRC_MASK		U(0xFF)
+#define DMA_CH_HI_ADR_PTR_DST_SHIFT		U(16)
+#define DMA_CH_HI_ADR_PTR_DST_MASK		U(0xFF)
+
+#define DMA_CH_MC_SEQ				U(0x18)
+#define DMA_CH_MC_SEQ_REQ_CNT_SHIFT		U(25)
+#define DMA_CH_MC_SEQ_REQ_CNT_VAL		U(0x10)
+#define DMA_CH_MC_SEQ_BURST_SHIFT		U(23)
+#define DMA_CH_MC_SEQ_BURST_16_WORDS		U(0x3)
+
+#define DMA_CH_WORD_COUNT			U(0x20)
+#define DMA_CH_FIXED_PATTERN			U(0x34)
+#define DMA_CH_TZ				U(0x38)
+#define DMA_CH_TZ_ACCESS_ENABLE			U(0)
+#define DMA_CH_TZ_ACCESS_DISABLE		U(3)
+
+#define MAX_TRANSFER_SIZE			(1U*1024U*1024U*1024U)	/* 1GB */
+#define GPCDMA_TIMEOUT_MS			U(100)
+#define GPCDMA_RESET_BIT			(U(1) << 1)
+
+static bool init_done;
+
+static void tegra_gpcdma_write32(uint32_t offset, uint32_t val)
+{
+	mmio_write_32(TEGRA_GPCDMA_BASE + offset, val);
+}
+
+static uint32_t tegra_gpcdma_read32(uint32_t offset)
+{
+	return mmio_read_32(TEGRA_GPCDMA_BASE + offset);
+}
+
+static void tegra_gpcdma_init(void)
+{
+	/* assert reset for DMA engine */
+	mmio_write_32(TEGRA_CAR_RESET_BASE + TEGRA_GPCDMA_RST_SET_REG_OFFSET,
+		      GPCDMA_RESET_BIT);
+
+	udelay(2);
+
+	/* de-assert reset for DMA engine */
+	mmio_write_32(TEGRA_CAR_RESET_BASE + TEGRA_GPCDMA_RST_CLR_REG_OFFSET,
+		      GPCDMA_RESET_BIT);
+}
+
+static void tegra_gpcdma_memcpy_priv(uint64_t dst_addr, uint64_t src_addr,
+				     uint32_t num_bytes, uint32_t mode)
+{
+	uint32_t val, timeout = 0;
+	int32_t ret = 0;
+
+	/* sanity check byte count */
+	if ((num_bytes > MAX_TRANSFER_SIZE) || ((num_bytes & 0x3U) != U(0))) {
+		ret = -EINVAL;
+	}
+
+	/* initialise GPCDMA block */
+	if (!init_done) {
+		tegra_gpcdma_init();
+		init_done = true;
+	}
+
+	/* make sure channel isn't busy */
+	val = tegra_gpcdma_read32(DMA_CH_STAT);
+	if ((val & DMA_CH_STAT_BUSY) == DMA_CH_STAT_BUSY) {
+		ERROR("DMA channel is busy\n");
+		ret = -EBUSY;
+	}
+
+	if (ret == 0) {
+
+		/* disable any DMA transfers */
+		tegra_gpcdma_write32(DMA_CH_CSR, 0);
+
+		/* enable DMA access to TZDRAM */
+		tegra_gpcdma_write32(DMA_CH_TZ, DMA_CH_TZ_ACCESS_ENABLE);
+
+		/* configure MC sequencer */
+		val = (DMA_CH_MC_SEQ_REQ_CNT_VAL << DMA_CH_MC_SEQ_REQ_CNT_SHIFT) |
+		      (DMA_CH_MC_SEQ_BURST_16_WORDS << DMA_CH_MC_SEQ_BURST_SHIFT);
+		tegra_gpcdma_write32(DMA_CH_MC_SEQ, val);
+
+		/* reset fixed pattern */
+		tegra_gpcdma_write32(DMA_CH_FIXED_PATTERN, 0);
+
+		/* populate src and dst address registers */
+		tegra_gpcdma_write32(DMA_CH_SRC_PTR, (uint32_t)src_addr);
+		tegra_gpcdma_write32(DMA_CH_DST_PTR, (uint32_t)dst_addr);
+
+		val = (uint32_t)((src_addr >> 32) & DMA_CH_HI_ADR_PTR_SRC_MASK);
+		val |= (uint32_t)(((dst_addr >> 32) & DMA_CH_HI_ADR_PTR_DST_MASK) <<
+			DMA_CH_HI_ADR_PTR_DST_SHIFT);
+		tegra_gpcdma_write32(DMA_CH_HI_ADR_PTR, val);
+
+		/* transfer size (in words) */
+		tegra_gpcdma_write32(DMA_CH_WORD_COUNT, ((num_bytes >> 2) - 1U));
+
+		/* populate value for CSR */
+		val = (mode << DMA_CH_CSR_XFER_MODE_SHIFT) |
+		      DMA_CH_CSR_RUN_ONCE | (U(1) << DMA_CH_CSR_WEIGHT_SHIFT) |
+		      DMA_CH_CSR_IRQ_MASK_ENABLE;
+		tegra_gpcdma_write32(DMA_CH_CSR, val);
+
+		/* enable transfer */
+		val = tegra_gpcdma_read32(DMA_CH_CSR);
+		val |= DMA_CH_CSR_ENABLE;
+		tegra_gpcdma_write32(DMA_CH_CSR, val);
+
+		/* wait till transfer completes */
+		do {
+
+			/* read the status */
+			val = tegra_gpcdma_read32(DMA_CH_STAT);
+			if ((val & DMA_CH_STAT_BUSY) != DMA_CH_STAT_BUSY) {
+				break;
+			}
+
+			mdelay(1);
+			timeout++;
+
+		} while (timeout < GPCDMA_TIMEOUT_MS);
+
+		/* flag timeout error */
+		if (timeout == GPCDMA_TIMEOUT_MS) {
+			ERROR("DMA transfer timed out\n");
+		}
+
+		dsbsy();
+
+		/* disable DMA access to TZDRAM */
+		tegra_gpcdma_write32(DMA_CH_TZ, DMA_CH_TZ_ACCESS_DISABLE);
+		isb();
+	}
+}
+
+/*******************************************************************************
+ * Memcpy using GPCDMA block (Mem2Mem copy)
+ ******************************************************************************/
+void tegra_gpcdma_memcpy(uint64_t dst_addr, uint64_t src_addr,
+			 uint32_t num_bytes)
+{
+	tegra_gpcdma_memcpy_priv(dst_addr, src_addr, num_bytes,
+				 DMA_CH_CSR_DMA_MODE_MEM2MEM);
+}
+
+/*******************************************************************************
+ * Memset using GPCDMA block (Fixed pattern write)
+ ******************************************************************************/
+void tegra_gpcdma_zeromem(uint64_t dst_addr, uint32_t num_bytes)
+{
+	tegra_gpcdma_memcpy_priv(dst_addr, 0, num_bytes,
+				 DMA_CH_CSR_DMA_MODE_FIXEDPATTERN);
+}
diff --git a/plat/nvidia/tegra/drivers/memctrl/memctrl_v1.c b/plat/nvidia/tegra/drivers/memctrl/memctrl_v1.c
new file mode 100644
index 0000000..b3dcd2a
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/memctrl/memctrl_v1.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2015-2019, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <arch_helpers.h>
+#include <common/debug.h>
+#include <lib/mmio.h>
+#include <lib/utils.h>
+#include <lib/xlat_tables/xlat_tables_v2.h>
+
+#include <memctrl.h>
+#include <memctrl_v1.h>
+#include <tegra_def.h>
+
+/* Video Memory base and size (live values) */
+static uint64_t video_mem_base;
+static uint64_t video_mem_size;
+
+/*
+ * Init SMMU.
+ */
+void tegra_memctrl_setup(void)
+{
+	/*
+	 * Setup the Memory controller to allow only secure accesses to
+	 * the TZDRAM carveout
+	 */
+	INFO("Tegra Memory Controller (v1)\n");
+
+	/* allow translations for all MC engines */
+	tegra_mc_write_32(MC_SMMU_TRANSLATION_ENABLE_0_0,
+			(unsigned int)MC_SMMU_TRANSLATION_ENABLE);
+	tegra_mc_write_32(MC_SMMU_TRANSLATION_ENABLE_1_0,
+			(unsigned int)MC_SMMU_TRANSLATION_ENABLE);
+	tegra_mc_write_32(MC_SMMU_TRANSLATION_ENABLE_2_0,
+			(unsigned int)MC_SMMU_TRANSLATION_ENABLE);
+	tegra_mc_write_32(MC_SMMU_TRANSLATION_ENABLE_3_0,
+			(unsigned int)MC_SMMU_TRANSLATION_ENABLE);
+	tegra_mc_write_32(MC_SMMU_TRANSLATION_ENABLE_4_0,
+			(unsigned int)MC_SMMU_TRANSLATION_ENABLE);
+
+	tegra_mc_write_32(MC_SMMU_ASID_SECURITY_0, MC_SMMU_ASID_SECURITY);
+
+	tegra_mc_write_32(MC_SMMU_TLB_CONFIG_0, MC_SMMU_TLB_CONFIG_0_RESET_VAL);
+	tegra_mc_write_32(MC_SMMU_PTC_CONFIG_0, MC_SMMU_PTC_CONFIG_0_RESET_VAL);
+
+	/* flush PTC and TLB */
+	tegra_mc_write_32(MC_SMMU_PTC_FLUSH_0, MC_SMMU_PTC_FLUSH_ALL);
+	(void)tegra_mc_read_32(MC_SMMU_CONFIG_0); /* read to flush writes */
+	tegra_mc_write_32(MC_SMMU_TLB_FLUSH_0, MC_SMMU_TLB_FLUSH_ALL);
+
+	/* enable SMMU */
+	tegra_mc_write_32(MC_SMMU_CONFIG_0,
+			  MC_SMMU_CONFIG_0_SMMU_ENABLE_ENABLE);
+	(void)tegra_mc_read_32(MC_SMMU_CONFIG_0); /* read to flush writes */
+
+	/* video memory carveout */
+	tegra_mc_write_32(MC_VIDEO_PROTECT_BASE_HI,
+			  (uint32_t)(video_mem_base >> 32));
+	tegra_mc_write_32(MC_VIDEO_PROTECT_BASE_LO, (uint32_t)video_mem_base);
+	tegra_mc_write_32(MC_VIDEO_PROTECT_SIZE_MB, video_mem_size);
+}
+
+/*
+ * Restore Memory Controller settings after "System Suspend"
+ */
+void tegra_memctrl_restore_settings(void)
+{
+	tegra_memctrl_setup();
+}
+
+/*
+ * Secure the BL31 DRAM aperture.
+ *
+ * phys_base = physical base of TZDRAM aperture
+ * size_in_bytes = size of aperture in bytes
+ */
+void tegra_memctrl_tzdram_setup(uint64_t phys_base, uint32_t size_in_bytes)
+{
+	/*
+	 * Setup the Memory controller to allow only secure accesses to
+	 * the TZDRAM carveout
+	 */
+	INFO("Configuring TrustZone DRAM Memory Carveout\n");
+
+	tegra_mc_write_32(MC_SECURITY_CFG0_0, phys_base);
+	tegra_mc_write_32(MC_SECURITY_CFG1_0, size_in_bytes >> 20);
+}
+
+static void tegra_clear_videomem(uintptr_t non_overlap_area_start,
+				 unsigned long long non_overlap_area_size)
+{
+	int ret;
+
+	/*
+	 * Map the NS memory first, clean it and then unmap it.
+	 */
+	ret = mmap_add_dynamic_region(non_overlap_area_start, /* PA */
+				non_overlap_area_start, /* VA */
+				non_overlap_area_size, /* size */
+				MT_NS | MT_RW | MT_EXECUTE_NEVER |
+				MT_NON_CACHEABLE); /* attrs */
+	assert(ret == 0);
+
+	zeromem((void *)non_overlap_area_start, non_overlap_area_size);
+	flush_dcache_range(non_overlap_area_start, non_overlap_area_size);
+
+	mmap_remove_dynamic_region(non_overlap_area_start,
+		non_overlap_area_size);
+}
+
+/*
+ * Program the Video Memory carveout region
+ *
+ * phys_base = physical base of aperture
+ * size_in_bytes = size of aperture in bytes
+ */
+void tegra_memctrl_videomem_setup(uint64_t phys_base, uint32_t size_in_bytes)
+{
+	uintptr_t vmem_end_old = video_mem_base + (video_mem_size << 20);
+	uintptr_t vmem_end_new = phys_base + size_in_bytes;
+	unsigned long long non_overlap_area_size;
+
+	/*
+	 * Setup the Memory controller to restrict CPU accesses to the Video
+	 * Memory region
+	 */
+	INFO("Configuring Video Memory Carveout\n");
+
+	/*
+	 * Configure Memory Controller directly for the first time.
+	 */
+	if (video_mem_base == 0)
+		goto done;
+
+	/*
+	 * Clear the old regions now being exposed. The following cases
+	 * can occur -
+	 *
+	 * 1. clear whole old region (no overlap with new region)
+	 * 2. clear old sub-region below new base
+	 * 3. clear old sub-region above new end
+	 */
+	INFO("Cleaning previous Video Memory Carveout\n");
+
+	if (phys_base > vmem_end_old || video_mem_base > vmem_end_new) {
+		tegra_clear_videomem(video_mem_base, video_mem_size << 20);
+	} else {
+		if (video_mem_base < phys_base) {
+			non_overlap_area_size = phys_base - video_mem_base;
+			tegra_clear_videomem(video_mem_base, non_overlap_area_size);
+		}
+		if (vmem_end_old > vmem_end_new) {
+			non_overlap_area_size = vmem_end_old - vmem_end_new;
+			tegra_clear_videomem(vmem_end_new, non_overlap_area_size);
+		}
+	}
+
+done:
+	tegra_mc_write_32(MC_VIDEO_PROTECT_BASE_HI, (uint32_t)(phys_base >> 32));
+	tegra_mc_write_32(MC_VIDEO_PROTECT_BASE_LO, (uint32_t)phys_base);
+	tegra_mc_write_32(MC_VIDEO_PROTECT_SIZE_MB, size_in_bytes >> 20);
+
+	/* store new values */
+	video_mem_base = phys_base;
+	video_mem_size = size_in_bytes >> 20;
+}
+
+/*
+ * During boot, USB3 and flash media (SDMMC/SATA) devices need access to
+ * IRAM. Because these clients connect to the MC and do not have a direct
+ * path to the IRAM, the MC implements AHB redirection during boot to allow
+ * path to IRAM. In this mode, accesses to a programmed memory address aperture
+ * are directed to the AHB bus, allowing access to the IRAM. The AHB aperture
+ * is defined by the IRAM_BASE_LO and IRAM_BASE_HI registers, which are
+ * initialized to disable this aperture.
+ *
+ * Once bootup is complete, we must program IRAM base to 0xffffffff and
+ * IRAM top to 0x00000000, thus disabling access to IRAM. DRAM is then
+ * potentially accessible in this address range. These aperture registers
+ * also have an access_control/lock bit. After disabling the aperture, the
+ * access_control register should be programmed to lock the registers.
+ */
+void tegra_memctrl_disable_ahb_redirection(void)
+{
+	/* program the aperture registers */
+	tegra_mc_write_32(MC_IRAM_BASE_LO, 0xFFFFFFFF);
+	tegra_mc_write_32(MC_IRAM_TOP_LO, 0);
+	tegra_mc_write_32(MC_IRAM_BASE_TOP_HI, 0);
+
+	/* lock the aperture registers */
+	tegra_mc_write_32(MC_IRAM_REG_CTRL, MC_DISABLE_IRAM_CFG_WRITES);
+}
+
+void tegra_memctrl_clear_pending_interrupts(void)
+{
+	uint32_t mcerr;
+
+	/* check if there are any pending interrupts */
+	mcerr = mmio_read_32(TEGRA_MC_BASE + MC_INTSTATUS);
+
+	if (mcerr != (uint32_t)0U) { /* should not see error here */
+		WARN("MC_INTSTATUS = 0x%x (should be zero)\n", mcerr);
+		mmio_write_32((TEGRA_MC_BASE + MC_INTSTATUS),  mcerr);
+	}
+}
diff --git a/plat/nvidia/tegra/drivers/memctrl/memctrl_v2.c b/plat/nvidia/tegra/drivers/memctrl/memctrl_v2.c
new file mode 100644
index 0000000..0644fd2
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/memctrl/memctrl_v2.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 2015-2017, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2019-2020, NVIDIA Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <arch_helpers.h>
+#include <common/bl_common.h>
+#include <common/debug.h>
+#include <lib/mmio.h>
+#include <lib/utils.h>
+#include <lib/xlat_tables/xlat_tables_v2.h>
+
+#include <mce.h>
+#include <memctrl.h>
+#include <memctrl_v2.h>
+#include <smmu.h>
+#include <tegra_def.h>
+#include <tegra_platform.h>
+#include <tegra_private.h>
+
+/* Video Memory base and size (live values) */
+static uint64_t video_mem_base;
+static uint64_t video_mem_size_mb;
+
+/*
+ * Init Memory controller during boot.
+ */
+void tegra_memctrl_setup(void)
+{
+	INFO("Tegra Memory Controller (v2)\n");
+
+	/* Initialize the System memory management unit */
+	tegra_smmu_init();
+
+	/* allow platforms to program custom memory controller settings */
+	plat_memctrl_setup();
+
+	/*
+	 * All requests at boot time, and certain requests during
+	 * normal run time, are physically addressed and must bypass
+	 * the SMMU. The client hub logic implements a hardware bypass
+	 * path around the Translation Buffer Units (TBU). During
+	 * boot-time, the SMMU_BYPASS_CTRL register (which defaults to
+	 * TBU_BYPASS mode) will be used to steer all requests around
+	 * the uninitialized TBUs. During normal operation, this register
+	 * is locked into TBU_BYPASS_SID config, which routes requests
+	 * with special StreamID 0x7f on the bypass path and all others
+	 * through the selected TBU. This is done to disable SMMU Bypass
+	 * mode, as it could be used to circumvent SMMU security checks.
+	 */
+	tegra_mc_write_32(MC_SMMU_BYPASS_CONFIG,
+			  MC_SMMU_BYPASS_CONFIG_SETTINGS);
+}
+
+/*
+ * Restore Memory Controller settings after "System Suspend"
+ */
+void tegra_memctrl_restore_settings(void)
+{
+	/* restore platform's memory controller settings */
+	plat_memctrl_restore();
+
+	/* video memory carveout region */
+	if (video_mem_base != 0ULL) {
+		tegra_mc_write_32(MC_VIDEO_PROTECT_BASE_LO,
+				  (uint32_t)video_mem_base);
+		assert(tegra_mc_read_32(MC_VIDEO_PROTECT_BASE_LO)
+			 == (uint32_t)video_mem_base);
+		tegra_mc_write_32(MC_VIDEO_PROTECT_BASE_HI,
+				  (uint32_t)(video_mem_base >> 32));
+		assert(tegra_mc_read_32(MC_VIDEO_PROTECT_BASE_HI)
+			 == (uint32_t)(video_mem_base >> 32));
+		tegra_mc_write_32(MC_VIDEO_PROTECT_SIZE_MB,
+				  (uint32_t)video_mem_size_mb);
+		assert(tegra_mc_read_32(MC_VIDEO_PROTECT_SIZE_MB)
+			 == (uint32_t)video_mem_size_mb);
+
+		/*
+		 * MCE propagates the VideoMem configuration values across the
+		 * CCPLEX.
+		 */
+		mce_update_gsc_videomem();
+	}
+}
+
+/*
+ * Secure the BL31 DRAM aperture.
+ *
+ * phys_base = physical base of TZDRAM aperture
+ * size_in_bytes = size of aperture in bytes
+ */
+void tegra_memctrl_tzdram_setup(uint64_t phys_base, uint32_t size_in_bytes)
+{
+	/*
+	 * Perform platform specific steps.
+	 */
+	plat_memctrl_tzdram_setup(phys_base, size_in_bytes);
+}
+
+/*
+ * Secure the BL31 TZRAM aperture.
+ *
+ * phys_base = physical base of TZRAM aperture
+ * size_in_bytes = size of aperture in bytes
+ */
+void tegra_memctrl_tzram_setup(uint64_t phys_base, uint32_t size_in_bytes)
+{
+	; /* do nothing */
+}
+
+/*
+ * Save MC settings before "System Suspend" to TZDRAM
+ */
+void tegra_mc_save_context(uint64_t mc_ctx_addr)
+{
+	uint32_t i, num_entries = 0;
+	mc_regs_t *mc_ctx_regs;
+	const plat_params_from_bl2_t *params_from_bl2 = bl31_get_plat_params();
+	uint64_t tzdram_base = params_from_bl2->tzdram_base;
+	uint64_t tzdram_end = tzdram_base + params_from_bl2->tzdram_size;
+
+	assert((mc_ctx_addr >= tzdram_base) && (mc_ctx_addr <= tzdram_end));
+
+	/* get MC context table */
+	mc_ctx_regs = plat_memctrl_get_sys_suspend_ctx();
+	assert(mc_ctx_regs != NULL);
+
+	/*
+	 * mc_ctx_regs[0].val contains the size of the context table minus
+	 * the last entry. Sanity check the table size before we start with
+	 * the context save operation.
+	 */
+	while (mc_ctx_regs[num_entries].reg != 0xFFFFFFFFU) {
+		num_entries++;
+	}
+
+	/* panic if the sizes do not match */
+	if (num_entries != mc_ctx_regs[0].val) {
+		ERROR("MC context size mismatch!");
+		panic();
+	}
+
+	/* save MC register values */
+	for (i = 1U; i < num_entries; i++) {
+		mc_ctx_regs[i].val = mmio_read_32(mc_ctx_regs[i].reg);
+	}
+
+	/* increment by 1 to take care of the last entry */
+	num_entries++;
+
+	/* Save MC config settings */
+	(void)memcpy((void *)mc_ctx_addr, mc_ctx_regs,
+			sizeof(mc_regs_t) * num_entries);
+
+	/* save the MC table address */
+	mmio_write_32(TEGRA_SCRATCH_BASE + SCRATCH_MC_TABLE_ADDR_LO,
+		(uint32_t)mc_ctx_addr);
+	assert(mmio_read_32(TEGRA_SCRATCH_BASE + SCRATCH_MC_TABLE_ADDR_LO)
+		== (uint32_t)mc_ctx_addr);
+	mmio_write_32(TEGRA_SCRATCH_BASE + SCRATCH_MC_TABLE_ADDR_HI,
+		(uint32_t)(mc_ctx_addr >> 32));
+	assert(mmio_read_32(TEGRA_SCRATCH_BASE + SCRATCH_MC_TABLE_ADDR_HI)
+		== (uint32_t)(mc_ctx_addr >> 32));
+}
+
+static void tegra_lock_videomem_nonoverlap(uint64_t phys_base,
+					   uint64_t size_in_bytes)
+{
+	uint32_t index;
+	uint64_t total_128kb_blocks = size_in_bytes >> 17;
+	uint64_t residual_4kb_blocks = (size_in_bytes & (uint32_t)0x1FFFF) >> 12;
+	uint64_t val;
+
+	/*
+	 * Reset the access configuration registers to restrict access to
+	 * old Videomem aperture
+	 */
+	for (index = MC_VIDEO_PROTECT_CLEAR_ACCESS_CFG0;
+	     index < ((uint32_t)MC_VIDEO_PROTECT_CLEAR_ACCESS_CFG0 + (uint32_t)MC_GSC_CONFIG_REGS_SIZE);
+	     index += 4U) {
+		tegra_mc_write_32(index, 0);
+	}
+
+	/*
+	 * Set the base. It must be 4k aligned, at least.
+	 */
+	assert((phys_base & (uint64_t)0xFFF) == 0U);
+	tegra_mc_write_32(MC_VIDEO_PROTECT_CLEAR_BASE_LO, (uint32_t)phys_base);
+	tegra_mc_write_32(MC_VIDEO_PROTECT_CLEAR_BASE_HI,
+		(uint32_t)(phys_base >> 32) & (uint32_t)MC_GSC_BASE_HI_MASK);
+
+	/*
+	 * Set the aperture size
+	 *
+	 * total size = (number of 128KB blocks) + (number of remaining 4KB
+	 * blocks)
+	 *
+	 */
+	val = (uint32_t)((residual_4kb_blocks << MC_GSC_SIZE_RANGE_4KB_SHIFT) |
+			 total_128kb_blocks);
+	tegra_mc_write_32(MC_VIDEO_PROTECT_CLEAR_SIZE, (uint32_t)val);
+
+	/*
+	 * Lock the configuration settings by enabling TZ-only lock and
+	 * locking the configuration against any future changes from NS
+	 * world.
+	 */
+	tegra_mc_write_32(MC_VIDEO_PROTECT_CLEAR_CFG,
+			  (uint32_t)MC_GSC_ENABLE_TZ_LOCK_BIT);
+
+	/*
+	 * MCE propagates the GSC configuration values across the
+	 * CCPLEX.
+	 */
+}
+
+static void tegra_unlock_videomem_nonoverlap(void)
+{
+	/* Clear the base */
+	tegra_mc_write_32(MC_VIDEO_PROTECT_CLEAR_BASE_LO, 0);
+	tegra_mc_write_32(MC_VIDEO_PROTECT_CLEAR_BASE_HI, 0);
+
+	/* Clear the size */
+	tegra_mc_write_32(MC_VIDEO_PROTECT_CLEAR_SIZE, 0);
+}
+
+static void tegra_clear_videomem(uintptr_t non_overlap_area_start,
+				 unsigned long long non_overlap_area_size)
+{
+	int ret;
+
+	INFO("Cleaning previous Video Memory Carveout\n");
+
+	/*
+	 * Map the NS memory first, clean it and then unmap it.
+	 */
+	ret = mmap_add_dynamic_region(non_overlap_area_start, /* PA */
+				non_overlap_area_start, /* VA */
+				non_overlap_area_size, /* size */
+				MT_DEVICE | MT_RW | MT_NS); /* attrs */
+	assert(ret == 0);
+
+	zeromem((void *)non_overlap_area_start, non_overlap_area_size);
+	flush_dcache_range(non_overlap_area_start, non_overlap_area_size);
+
+	ret = mmap_remove_dynamic_region(non_overlap_area_start,
+		non_overlap_area_size);
+	assert(ret == 0);
+}
+
+static void tegra_clear_videomem_nonoverlap(uintptr_t phys_base,
+		unsigned long size_in_bytes)
+{
+	uintptr_t vmem_end_old = video_mem_base + (video_mem_size_mb << 20);
+	uintptr_t vmem_end_new = phys_base + size_in_bytes;
+	unsigned long long non_overlap_area_size;
+
+	/*
+	 * Clear the old regions now being exposed. The following cases
+	 * can occur -
+	 *
+	 * 1. clear whole old region (no overlap with new region)
+	 * 2. clear old sub-region below new base
+	 * 3. clear old sub-region above new end
+	 */
+	if ((phys_base > vmem_end_old) || (video_mem_base > vmem_end_new)) {
+		tegra_clear_videomem(video_mem_base,
+				     video_mem_size_mb << 20U);
+	} else {
+		if (video_mem_base < phys_base) {
+			non_overlap_area_size = phys_base - video_mem_base;
+			tegra_clear_videomem(video_mem_base, non_overlap_area_size);
+		}
+		if (vmem_end_old > vmem_end_new) {
+			non_overlap_area_size = vmem_end_old - vmem_end_new;
+			tegra_clear_videomem(vmem_end_new, non_overlap_area_size);
+		}
+	}
+}
+
+/*
+ * Program the Video Memory carveout region
+ *
+ * phys_base = physical base of aperture
+ * size_in_bytes = size of aperture in bytes
+ */
+void tegra_memctrl_videomem_setup(uint64_t phys_base, uint32_t size_in_bytes)
+{
+	/*
+	 * Setup the Memory controller to restrict CPU accesses to the Video
+	 * Memory region
+	 */
+
+	INFO("Configuring Video Memory Carveout\n");
+
+	if (video_mem_base != 0U) {
+		/*
+		 * Lock the non overlapping memory being cleared so that
+		 * other masters do not accidentally write to it. The memory
+		 * would be unlocked once the non overlapping region is
+		 * cleared and the new memory settings take effect.
+		 */
+		tegra_lock_videomem_nonoverlap(video_mem_base,
+			video_mem_size_mb << 20);
+	}
+
+	/* program the Videomem aperture */
+	tegra_mc_write_32(MC_VIDEO_PROTECT_BASE_LO, (uint32_t)phys_base);
+	tegra_mc_write_32(MC_VIDEO_PROTECT_BASE_HI,
+			  (uint32_t)(phys_base >> 32));
+	tegra_mc_write_32(MC_VIDEO_PROTECT_SIZE_MB, size_in_bytes >> 20);
+
+	/* Redundancy check for Video Protect setting */
+	assert(tegra_mc_read_32(MC_VIDEO_PROTECT_BASE_LO)
+		 == (uint32_t)phys_base);
+	assert(tegra_mc_read_32(MC_VIDEO_PROTECT_BASE_HI)
+		 == (uint32_t)(phys_base >> 32));
+	assert(tegra_mc_read_32(MC_VIDEO_PROTECT_SIZE_MB)
+		 == (size_in_bytes >> 20));
+
+	/*
+	 * MCE propagates the VideoMem configuration values across the
+	 * CCPLEX.
+	 */
+	(void)mce_update_gsc_videomem();
+
+	/* Clear the non-overlapping memory */
+	if (video_mem_base != 0U) {
+		tegra_clear_videomem_nonoverlap(phys_base, size_in_bytes);
+		tegra_unlock_videomem_nonoverlap();
+	}
+
+	/* store new values */
+	video_mem_base = phys_base;
+	video_mem_size_mb = (uint64_t)size_in_bytes >> 20;
+}
+
+/*
+ * This feature exists only for v1 of the Tegra Memory Controller.
+ */
+void tegra_memctrl_disable_ahb_redirection(void)
+{
+	; /* do nothing */
+}
+
+void tegra_memctrl_clear_pending_interrupts(void)
+{
+	; /* do nothing */
+}
diff --git a/plat/nvidia/tegra/drivers/pmc/pmc.c b/plat/nvidia/tegra/drivers/pmc/pmc.c
new file mode 100644
index 0000000..e70e7a6
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/pmc/pmc.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2015, ARM Limited and Contributors. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+
+#include <arch_helpers.h>
+#include <common/debug.h>
+#include <lib/mmio.h>
+
+#include <pmc.h>
+#include <tegra_def.h>
+
+#define RESET_ENABLE	0x10U
+
+/* Module IDs used during power ungate procedure */
+static const uint32_t pmc_cpu_powergate_id[4] = {
+	14, /* CPU 0 */
+	9, /* CPU 1 */
+	10, /* CPU 2 */
+	11 /* CPU 3 */
+};
+
+/*******************************************************************************
+ * Power ungate CPU to start the boot process. CPU reset vectors must be
+ * populated before calling this function.
+ ******************************************************************************/
+void tegra_pmc_cpu_on(int32_t cpu)
+{
+	uint32_t val;
+
+	/*
+	 * Check if CPU is already power ungated
+	 */
+	val = tegra_pmc_read_32(PMC_PWRGATE_STATUS);
+	if ((val & (1U << pmc_cpu_powergate_id[cpu])) == 0U) {
+		/*
+		 * The PMC deasserts the START bit when it starts the power
+		 * ungate process. Loop till no power toggle is in progress.
+		 */
+		do {
+			val = tegra_pmc_read_32(PMC_PWRGATE_TOGGLE);
+		} while ((val & PMC_TOGGLE_START) != 0U);
+
+		/*
+		 * Start the power ungate procedure
+		 */
+		val = pmc_cpu_powergate_id[cpu] | PMC_TOGGLE_START;
+		tegra_pmc_write_32(PMC_PWRGATE_TOGGLE, val);
+
+		/*
+		 * The PMC deasserts the START bit when it starts the power
+		 * ungate process. Loop till powergate START bit is asserted.
+		 */
+		do {
+			val = tegra_pmc_read_32(PMC_PWRGATE_TOGGLE);
+		} while ((val & (1U << 8)) != 0U);
+
+		/* loop till the CPU is power ungated */
+		do {
+			val = tegra_pmc_read_32(PMC_PWRGATE_STATUS);
+		} while ((val & (1U << pmc_cpu_powergate_id[cpu])) == 0U);
+	}
+}
+
+/*******************************************************************************
+ * Setup CPU vectors for resume from deep sleep
+ ******************************************************************************/
+void tegra_pmc_cpu_setup(uint64_t reset_addr)
+{
+	uint32_t val;
+
+	tegra_pmc_write_32(PMC_SECURE_SCRATCH34,
+			   ((uint32_t)reset_addr & 0xFFFFFFFFU) | 1U);
+	val = (uint32_t)(reset_addr >> 32U);
+	tegra_pmc_write_32(PMC_SECURE_SCRATCH35, val & 0x7FFU);
+}
+
+/*******************************************************************************
+ * Lock CPU vectors to restrict further writes
+ ******************************************************************************/
+void tegra_pmc_lock_cpu_vectors(void)
+{
+	uint32_t val;
+
+	/* lock PMC_SECURE_SCRATCH22 */
+	val = tegra_pmc_read_32(PMC_SECURE_DISABLE2);
+	val |= PMC_SECURE_DISABLE2_WRITE22_ON;
+	tegra_pmc_write_32(PMC_SECURE_DISABLE2, val);
+
+	/* lock PMC_SECURE_SCRATCH34/35 */
+	val = tegra_pmc_read_32(PMC_SECURE_DISABLE3);
+	val |= (PMC_SECURE_DISABLE3_WRITE34_ON |
+		PMC_SECURE_DISABLE3_WRITE35_ON);
+	tegra_pmc_write_32(PMC_SECURE_DISABLE3, val);
+}
+
+/*******************************************************************************
+ * Find out if this is the last standing CPU
+ ******************************************************************************/
+bool tegra_pmc_is_last_on_cpu(void)
+{
+	int i, cpu = read_mpidr() & MPIDR_CPU_MASK;
+	uint32_t val = tegra_pmc_read_32(PMC_PWRGATE_STATUS);
+	bool status = true;
+
+	/* check if this is the last standing CPU */
+	for (i = 0; i < PLATFORM_MAX_CPUS_PER_CLUSTER; i++) {
+
+		/* skip the current CPU */
+		if (i == cpu)
+			continue;
+
+		/* are other CPUs already power gated? */
+		if ((val & ((uint32_t)1 << pmc_cpu_powergate_id[i])) != 0U) {
+			status = false;
+		}
+	}
+
+	return status;
+}
+
+/*******************************************************************************
+ * Handler to be called on exiting System suspend. Right now only DPD registers
+ * are cleared.
+ ******************************************************************************/
+void tegra_pmc_resume(void)
+{
+
+	/* Clear DPD sample */
+	mmio_write_32((TEGRA_PMC_BASE + PMC_IO_DPD_SAMPLE), 0x0);
+
+	/* Clear DPD Enable */
+	mmio_write_32((TEGRA_PMC_BASE + PMC_DPD_ENABLE_0), 0x0);
+}
+
+/*******************************************************************************
+ * Restart the system
+ ******************************************************************************/
+__dead2 void tegra_pmc_system_reset(void)
+{
+	uint32_t reg;
+
+	reg = tegra_pmc_read_32(PMC_CONFIG);
+	reg |= RESET_ENABLE;		/* restart */
+	tegra_pmc_write_32(PMC_CONFIG, reg);
+	wfi();
+
+	ERROR("Tegra System Reset: operation not handled.\n");
+	panic();
+}
diff --git a/plat/nvidia/tegra/drivers/smmu/smmu.c b/plat/nvidia/tegra/drivers/smmu/smmu.c
new file mode 100644
index 0000000..4189b00
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/smmu/smmu.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2016-2018, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <platform_def.h>
+
+#include <common/bl_common.h>
+#include <common/debug.h>
+
+#include <smmu.h>
+#include <tegra_platform.h>
+#include <tegra_private.h>
+
+extern void memcpy16(void *dest, const void *src, unsigned int length);
+
+#define SMMU_NUM_CONTEXTS		64U
+#define SMMU_CONTEXT_BANK_MAX_IDX	64U
+
+#define MISMATCH_DETECTED		0x55AA55AAU
+
+/*
+ * Init SMMU during boot or "System Suspend" exit
+ */
+void tegra_smmu_init(void)
+{
+	uint32_t val, cb_idx, smmu_id, ctx_base;
+	uint32_t num_smmu_devices = plat_get_num_smmu_devices();
+
+	for (smmu_id = 0U; smmu_id < num_smmu_devices; smmu_id++) {
+		/* Program the SMMU pagesize and reset CACHE_LOCK bit */
+		val = tegra_smmu_read_32(smmu_id, SMMU_GSR0_SECURE_ACR);
+		val |= SMMU_GSR0_PGSIZE_64K;
+		val &= (uint32_t)~SMMU_ACR_CACHE_LOCK_ENABLE_BIT;
+		tegra_smmu_write_32(smmu_id, SMMU_GSR0_SECURE_ACR, val);
+
+		/* reset CACHE LOCK bit for NS Aux. Config. Register */
+		val = tegra_smmu_read_32(smmu_id, SMMU_GNSR_ACR);
+		val &= (uint32_t)~SMMU_ACR_CACHE_LOCK_ENABLE_BIT;
+		tegra_smmu_write_32(smmu_id, SMMU_GNSR_ACR, val);
+
+		/* disable TCU prefetch for all contexts */
+		ctx_base = (SMMU_GSR0_PGSIZE_64K * SMMU_NUM_CONTEXTS)
+				+ SMMU_CBn_ACTLR;
+		for (cb_idx = 0U; cb_idx < SMMU_CONTEXT_BANK_MAX_IDX; cb_idx++) {
+			val = tegra_smmu_read_32(smmu_id,
+				ctx_base + (SMMU_GSR0_PGSIZE_64K * cb_idx));
+			val &= (uint32_t)~SMMU_CBn_ACTLR_CPRE_BIT;
+			tegra_smmu_write_32(smmu_id, ctx_base +
+				(SMMU_GSR0_PGSIZE_64K * cb_idx), val);
+		}
+
+		/* set CACHE LOCK bit for NS Aux. Config. Register */
+		val = tegra_smmu_read_32(smmu_id, SMMU_GNSR_ACR);
+		val |= (uint32_t)SMMU_ACR_CACHE_LOCK_ENABLE_BIT;
+		tegra_smmu_write_32(smmu_id, SMMU_GNSR_ACR, val);
+
+		/* set CACHE LOCK bit for S Aux. Config. Register */
+		val = tegra_smmu_read_32(smmu_id, SMMU_GSR0_SECURE_ACR);
+		val |= (uint32_t)SMMU_ACR_CACHE_LOCK_ENABLE_BIT;
+		tegra_smmu_write_32(smmu_id, SMMU_GSR0_SECURE_ACR, val);
+	}
+}
+
+/*
+ * Verify SMMU settings have not been altered during boot
+ */
+void tegra_smmu_verify(void)
+{
+	uint32_t cb_idx, ctx_base, smmu_id, val;
+	uint32_t num_smmu_devices = plat_get_num_smmu_devices();
+	uint32_t mismatch = 0U;
+
+	for (smmu_id = 0U; smmu_id < num_smmu_devices; smmu_id++) {
+		/* check PGSIZE_64K bit inr S Aux. Config. Register */
+		val = tegra_smmu_read_32(smmu_id, SMMU_GSR0_SECURE_ACR);
+		if (0U == (val & SMMU_GSR0_PGSIZE_64K)) {
+			ERROR("%s: PGSIZE_64K Mismatch - smmu_id=%d, GSR0_SECURE_ACR=%x\n",
+				__func__, smmu_id, val);
+			mismatch = MISMATCH_DETECTED;
+		}
+
+		/* check CACHE LOCK bit in S Aux. Config. Register */
+		if (0U == (val & SMMU_ACR_CACHE_LOCK_ENABLE_BIT)) {
+			ERROR("%s: CACHE_LOCK Mismatch - smmu_id=%d, GSR0_SECURE_ACR=%x\n",
+				__func__, smmu_id, val);
+			mismatch = MISMATCH_DETECTED;
+		}
+
+		/* check CACHE LOCK bit in NS Aux. Config. Register */
+		val = tegra_smmu_read_32(smmu_id, SMMU_GNSR_ACR);
+		if (0U == (val & SMMU_ACR_CACHE_LOCK_ENABLE_BIT)) {
+			ERROR("%s: Mismatch - smmu_id=%d, GNSR_ACR=%x\n",
+				__func__, smmu_id, val);
+			mismatch = MISMATCH_DETECTED;
+		}
+
+		/* verify TCU prefetch for all contexts is disabled */
+		ctx_base = (SMMU_GSR0_PGSIZE_64K * SMMU_NUM_CONTEXTS) +
+			SMMU_CBn_ACTLR;
+		for (cb_idx = 0U; cb_idx < SMMU_CONTEXT_BANK_MAX_IDX; cb_idx++) {
+			val = tegra_smmu_read_32(smmu_id,
+				ctx_base + (SMMU_GSR0_PGSIZE_64K * cb_idx));
+			if (0U != (val & SMMU_CBn_ACTLR_CPRE_BIT)) {
+				ERROR("%s: Mismatch - smmu_id=%d, cb_idx=%d, GSR0_PGSIZE_64K=%x\n",
+					__func__, smmu_id, cb_idx, val);
+				mismatch = MISMATCH_DETECTED;
+			}
+		}
+	}
+
+	/* Treat configuration mismatch as fatal */
+	if ((mismatch == MISMATCH_DETECTED) && tegra_platform_is_silicon()) {
+		panic();
+	}
+}
diff --git a/plat/nvidia/tegra/drivers/spe/shared_console.S b/plat/nvidia/tegra/drivers/spe/shared_console.S
new file mode 100644
index 0000000..5ad4eb8
--- /dev/null
+++ b/plat/nvidia/tegra/drivers/spe/shared_console.S
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2017-2020, ARM Limited and Contributors. All rights reserved.
+ * Copyright (c) 2020, NVIDIA Corporation. All rights reserved.
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+#include <asm_macros.S>
+#include <assert_macros.S>
+#include <console_macros.S>
+
+#define CONSOLE_NUM_BYTES_SHIFT		24
+#define CONSOLE_FLUSH_DATA_TO_PORT	(1 << 26)
+#define CONSOLE_RING_DOORBELL		(1 << 31)
+#define CONSOLE_IS_BUSY			(1 << 31)
+#define CONSOLE_TIMEOUT			0xC000		/* 50 ms */
+
+	/*
+	 * This file contains a driver implementation to make use of the
+	 * real console implementation provided by the SPE firmware running
+	 * SoCs after Tegra186.
+	 *
+	 * This console is shared by multiple components and the SPE firmware
+	 * finally displays everything on the UART port.
+	 */
+
+	.globl	console_spe_core_init
+	.globl	console_spe_core_putc
+	.globl	console_spe_core_getc
+	.globl	console_spe_core_flush
+	.globl	console_spe_putc
+	.globl	console_spe_getc
+	.globl	console_spe_flush
+	.globl	console_spe_register
+
+.macro	check_if_console_is_ready base, tmp1, tmp2, label
+	/* wait until spe is ready or timeout expires */
+	mrs	\tmp2, cntps_tval_el1
+1:	ldr	\tmp1, [\base]
+	and	\tmp1, \tmp1, #CONSOLE_IS_BUSY
+	cbz	\tmp1, 2f
+	mrs	\tmp1, cntps_tval_el1
+	sub	\tmp1, \tmp2, \tmp1
+	cmp	\tmp1, #CONSOLE_TIMEOUT
+	b.lt	1b
+	b	\label
+2:
+.endm
+
+	/* -------------------------------------------------
+	 * int console_spe_register(uintptr_t baseaddr,
+	 *     uint32_t clock, uint32_t baud,
+	 *     console_t *console);
+	 * Function to initialize and register a new spe
+	 * console. Storage passed in for the console struct
+	 * *must* be persistent (i.e. not from the stack).
+	 * In: x0 - UART register base address
+	 *     w1 - UART clock in Hz
+	 *     w2 - Baud rate
+	 *     x3 - pointer to empty console_t struct
+	 * Out: return 1 on success, 0 on error
+	 * Clobber list : x0, x1, x2, x6, x7, x14
+	 * -------------------------------------------------
+	 */
+func console_spe_register
+	/* Check the input base address */
+	cbz	x0, register_fail
+
+	/* Dont use clock or baud rate, so ok to overwrite them */
+	check_if_console_is_ready x0, x1, x2, register_fail
+
+	cbz	x3, register_fail
+	str	x0, [x3, #CONSOLE_T_BASE]
+	mov	x0, x3
+	finish_console_register spe putc=1, getc=ENABLE_CONSOLE_GETC, flush=1
+
+register_fail:
+	mov	w0, wzr
+	ret
+endfunc console_spe_register
+
+	/* --------------------------------------------------------
+	 * int console_spe_core_putc(int c, uintptr_t base_addr)
+	 * Function to output a character over the console. It
+	 * returns the character printed on success or -1 on error.
+	 * In : w0 - character to be printed
+	 *      x1 - console base address
+	 * Out : return -1 on error else return character.
+	 * Clobber list : x2, x3
+	 * --------------------------------------------------------
+	 */
+func console_spe_core_putc
+	/* Check the input parameter */
+	cbz	x1, putc_error
+
+	/* Prepend '\r' to '\n' */
+	cmp	w0, #0xA
+	b.ne	not_eol
+
+	check_if_console_is_ready x1, x2, x3, putc_error
+
+	/* spe is ready */
+	mov	w2, #0xD		/* '\r' */
+	and	w2, w2, #0xFF
+	mov	w3, #(CONSOLE_RING_DOORBELL | (1 << CONSOLE_NUM_BYTES_SHIFT))
+	orr	w2, w2, w3
+	str	w2, [x1]
+
+not_eol:
+	check_if_console_is_ready x1, x2, x3, putc_error
+
+	/* spe is ready */
+	mov	w2, w0
+	and	w2, w2, #0xFF
+	mov	w3, #(CONSOLE_RING_DOORBELL | (1 << CONSOLE_NUM_BYTES_SHIFT))
+	orr	w2, w2, w3
+	str	w2, [x1]
+
+	ret
+putc_error:
+	mov	w0, #-1
+	ret
+endfunc console_spe_core_putc
+
+	/* --------------------------------------------------------
+	 * int console_spe_putc(int c, console_t *console)
+	 * Function to output a character over the console. It
+	 * returns the character printed on success or -1 on error.
+	 * In : w0 - character to be printed
+	 *      x1 - pointer to console_t structure
+	 * Out : return -1 on error else return character.
+	 * Clobber list : x2
+	 * --------------------------------------------------------
+	 */
+func console_spe_putc
+	ldr	x1, [x1, #CONSOLE_T_BASE]
+	b	console_spe_core_putc
+endfunc console_spe_putc
+
+	/* ---------------------------------------------
+	 * int console_spe_getc(console_t *console)
+	 * Function to get a character from the console.
+	 * It returns the character grabbed on success
+	 * or -1 if no character is available.
+	 * In : x0 - pointer to console_t structure
+	 * Out: w0 - character if available, else -1
+	 * Clobber list : x0, x1
+	 * ---------------------------------------------
+	 */
+func console_spe_getc
+	mov	w0, #-1
+	ret
+endfunc console_spe_getc
+
+	/* -------------------------------------------------
+	 * void console_spe_core_flush(uintptr_t base_addr)
+	 * Function to force a write of all buffered
+	 * data that hasn't been output.
+	 * In : x0 - console base address
+	 * Out : void.
+	 * Clobber list : x0, x1
+	 * -------------------------------------------------
+	 */
+func console_spe_core_flush
+#if ENABLE_ASSERTIONS
+	cmp	x0, #0
+	ASM_ASSERT(ne)
+#endif /* ENABLE_ASSERTIONS */
+
+	/* flush console */
+	mov	w1, #(CONSOLE_RING_DOORBELL | CONSOLE_FLUSH_DATA_TO_PORT)
+	str	w1, [x0]
+	ret
+endfunc console_spe_core_flush
+
+	/* ---------------------------------------------
+	 * void console_spe_flush(console_t *console)
+	 * Function to force a write of all buffered
+	 * data that hasn't been output.
+	 * In : x0 - pointer to console_t structure
+	 * Out : void.
+	 * Clobber list : x0, x1
+	 * ---------------------------------------------
+	 */
+func console_spe_flush
+	ldr	x0, [x0, #CONSOLE_T_BASE]
+	b	console_spe_core_flush
+endfunc console_spe_flush