Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/gpu/drm/i915/gvt
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
40 files changed, 24045 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile
new file mode 100644
index 0000000000..1699f64429
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+
+kvmgt-$(CONFIG_DRM_I915_GVT) += \
+	gvt/aperture_gm.o \
+	gvt/cfg_space.o \
+	gvt/cmd_parser.o \
+	gvt/debugfs.o \
+	gvt/display.o \
+	gvt/dmabuf.o \
+	gvt/edid.o \
+	gvt/execlist.o \
+	gvt/fb_decoder.o \
+	gvt/firmware.o \
+	gvt/gtt.o \
+	gvt/handlers.o \
+	gvt/interrupt.o \
+	gvt/kvmgt.o \
+	gvt/mmio.o \
+	gvt/mmio_context.o \
+	gvt/opregion.o \
+	gvt/page_track.o \
+	gvt/sched_policy.o \
+	gvt/scheduler.o \
+	gvt/trace_points.o \
+	gvt/vgpu.o
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
new file mode 100644
index 0000000000..eedd1865bb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Dexuan Cui
+ *
+ * Contributors:
+ *    Pei Zhang <pei.zhang@intel.com>
+ *    Min He <min.he@intel.com>
+ *    Niu Bing <bing.niu@intel.com>
+ *    Yulei Zhang <yulei.zhang@intel.com>
+ *    Zhenyu Wang <zhenyuw@linux.intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gt/intel_ggtt_fencing.h"
+#include "gvt.h"
+
+static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gt *gt = gvt->gt;
+	unsigned int flags;
+	u64 start, end, size;
+	struct drm_mm_node *node;
+	int ret;
+
+	if (high_gm) {
+		node = &vgpu->gm.high_gm_node;
+		size = vgpu_hidden_sz(vgpu);
+		start = ALIGN(gvt_hidden_gmadr_base(gvt), I915_GTT_PAGE_SIZE);
+		end = ALIGN(gvt_hidden_gmadr_end(gvt), I915_GTT_PAGE_SIZE);
+		flags = PIN_HIGH;
+	} else {
+		node = &vgpu->gm.low_gm_node;
+		size = vgpu_aperture_sz(vgpu);
+		start = ALIGN(gvt_aperture_gmadr_base(gvt), I915_GTT_PAGE_SIZE);
+		end = ALIGN(gvt_aperture_gmadr_end(gvt), I915_GTT_PAGE_SIZE);
+		flags = PIN_MAPPABLE;
+	}
+
+	mutex_lock(&gt->ggtt->vm.mutex);
+	mmio_hw_access_pre(gt);
+	ret = i915_gem_gtt_insert(&gt->ggtt->vm, NULL, node,
+				  size, I915_GTT_PAGE_SIZE,
+				  I915_COLOR_UNEVICTABLE,
+				  start, end, flags);
+	mmio_hw_access_post(gt);
+	mutex_unlock(&gt->ggtt->vm.mutex);
+	if (ret)
+		gvt_err("fail to alloc %s gm space from host\n",
+			high_gm ? "high" : "low");
+
+	return ret;
+}
+
+static int alloc_vgpu_gm(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gt *gt = gvt->gt;
+	int ret;
+
+	ret = alloc_gm(vgpu, false);
+	if (ret)
+		return ret;
+
+	ret = alloc_gm(vgpu, true);
+	if (ret)
+		goto out_free_aperture;
+
+	gvt_dbg_core("vgpu%d: alloc low GM start %llx size %llx\n", vgpu->id,
+		     vgpu_aperture_offset(vgpu), vgpu_aperture_sz(vgpu));
+
+	gvt_dbg_core("vgpu%d: alloc high GM start %llx size %llx\n", vgpu->id,
+		     vgpu_hidden_offset(vgpu), vgpu_hidden_sz(vgpu));
+
+	return 0;
+out_free_aperture:
+	mutex_lock(&gt->ggtt->vm.mutex);
+	drm_mm_remove_node(&vgpu->gm.low_gm_node);
+	mutex_unlock(&gt->ggtt->vm.mutex);
+	return ret;
+}
+
+static void free_vgpu_gm(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gt *gt = gvt->gt;
+
+	mutex_lock(&gt->ggtt->vm.mutex);
+	drm_mm_remove_node(&vgpu->gm.low_gm_node);
+	drm_mm_remove_node(&vgpu->gm.high_gm_node);
+	mutex_unlock(&gt->ggtt->vm.mutex);
+}
+
+/**
+ * intel_vgpu_write_fence - write fence registers owned by a vGPU
+ * @vgpu: vGPU instance
+ * @fence: vGPU fence register number
+ * @value: Fence register value to be written
+ *
+ * This function is used to write fence registers owned by a vGPU. The vGPU
+ * fence register number will be translated into HW fence register number.
+ *
+ */
+void intel_vgpu_write_fence(struct intel_vgpu *vgpu,
+		u32 fence, u64 value)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_i915_private *i915 = gvt->gt->i915;
+	struct intel_uncore *uncore = gvt->gt->uncore;
+	struct i915_fence_reg *reg;
+	i915_reg_t fence_reg_lo, fence_reg_hi;
+
+	assert_rpm_wakelock_held(uncore->rpm);
+
+	if (drm_WARN_ON(&i915->drm, fence >= vgpu_fence_sz(vgpu)))
+		return;
+
+	reg = vgpu->fence.regs[fence];
+	if (drm_WARN_ON(&i915->drm, !reg))
+		return;
+
+	fence_reg_lo = FENCE_REG_GEN6_LO(reg->id);
+	fence_reg_hi = FENCE_REG_GEN6_HI(reg->id);
+
+	intel_uncore_write(uncore, fence_reg_lo, 0);
+	intel_uncore_posting_read(uncore, fence_reg_lo);
+
+	intel_uncore_write(uncore, fence_reg_hi, upper_32_bits(value));
+	intel_uncore_write(uncore, fence_reg_lo, lower_32_bits(value));
+	intel_uncore_posting_read(uncore, fence_reg_lo);
+}
+
+static void _clear_vgpu_fence(struct intel_vgpu *vgpu)
+{
+	int i;
+
+	for (i = 0; i < vgpu_fence_sz(vgpu); i++)
+		intel_vgpu_write_fence(vgpu, i, 0);
+}
+
+static void free_vgpu_fence(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_uncore *uncore = gvt->gt->uncore;
+	struct i915_fence_reg *reg;
+	intel_wakeref_t wakeref;
+	u32 i;
+
+	if (drm_WARN_ON(&gvt->gt->i915->drm, !vgpu_fence_sz(vgpu)))
+		return;
+
+	wakeref = intel_runtime_pm_get(uncore->rpm);
+
+	mutex_lock(&gvt->gt->ggtt->vm.mutex);
+	_clear_vgpu_fence(vgpu);
+	for (i = 0; i < vgpu_fence_sz(vgpu); i++) {
+		reg = vgpu->fence.regs[i];
+		i915_unreserve_fence(reg);
+		vgpu->fence.regs[i] = NULL;
+	}
+	mutex_unlock(&gvt->gt->ggtt->vm.mutex);
+
+	intel_runtime_pm_put(uncore->rpm, wakeref);
+}
+
+static int alloc_vgpu_fence(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_uncore *uncore = gvt->gt->uncore;
+	struct i915_fence_reg *reg;
+	intel_wakeref_t wakeref;
+	int i;
+
+	wakeref = intel_runtime_pm_get(uncore->rpm);
+
+	/* Request fences from host */
+	mutex_lock(&gvt->gt->ggtt->vm.mutex);
+
+	for (i = 0; i < vgpu_fence_sz(vgpu); i++) {
+		reg = i915_reserve_fence(gvt->gt->ggtt);
+		if (IS_ERR(reg))
+			goto out_free_fence;
+
+		vgpu->fence.regs[i] = reg;
+	}
+
+	_clear_vgpu_fence(vgpu);
+
+	mutex_unlock(&gvt->gt->ggtt->vm.mutex);
+	intel_runtime_pm_put(uncore->rpm, wakeref);
+	return 0;
+
+out_free_fence:
+	gvt_vgpu_err("Failed to alloc fences\n");
+	/* Return fences to host, if fail */
+	for (i = 0; i < vgpu_fence_sz(vgpu); i++) {
+		reg = vgpu->fence.regs[i];
+		if (!reg)
+			continue;
+		i915_unreserve_fence(reg);
+		vgpu->fence.regs[i] = NULL;
+	}
+	mutex_unlock(&gvt->gt->ggtt->vm.mutex);
+	intel_runtime_pm_put_unchecked(uncore->rpm);
+	return -ENOSPC;
+}
+
+static void free_resource(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+
+	gvt->gm.vgpu_allocated_low_gm_size -= vgpu_aperture_sz(vgpu);
+	gvt->gm.vgpu_allocated_high_gm_size -= vgpu_hidden_sz(vgpu);
+	gvt->fence.vgpu_allocated_fence_num -= vgpu_fence_sz(vgpu);
+}
+
+static int alloc_resource(struct intel_vgpu *vgpu,
+		const struct intel_vgpu_config *conf)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	unsigned long request, avail, max, taken;
+	const char *item;
+
+	if (!conf->low_mm || !conf->high_mm || !conf->fence) {
+		gvt_vgpu_err("Invalid vGPU creation params\n");
+		return -EINVAL;
+	}
+
+	item = "low GM space";
+	max = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
+	taken = gvt->gm.vgpu_allocated_low_gm_size;
+	avail = max - taken;
+	request = conf->low_mm;
+
+	if (request > avail)
+		goto no_enough_resource;
+
+	vgpu_aperture_sz(vgpu) = ALIGN(request, I915_GTT_PAGE_SIZE);
+
+	item = "high GM space";
+	max = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
+	taken = gvt->gm.vgpu_allocated_high_gm_size;
+	avail = max - taken;
+	request = conf->high_mm;
+
+	if (request > avail)
+		goto no_enough_resource;
+
+	vgpu_hidden_sz(vgpu) = ALIGN(request, I915_GTT_PAGE_SIZE);
+
+	item = "fence";
+	max = gvt_fence_sz(gvt) - HOST_FENCE;
+	taken = gvt->fence.vgpu_allocated_fence_num;
+	avail = max - taken;
+	request = conf->fence;
+
+	if (request > avail)
+		goto no_enough_resource;
+
+	vgpu_fence_sz(vgpu) = request;
+
+	gvt->gm.vgpu_allocated_low_gm_size += conf->low_mm;
+	gvt->gm.vgpu_allocated_high_gm_size += conf->high_mm;
+	gvt->fence.vgpu_allocated_fence_num += conf->fence;
+	return 0;
+
+no_enough_resource:
+	gvt_err("fail to allocate resource %s\n", item);
+	gvt_err("request %luMB avail %luMB max %luMB taken %luMB\n",
+		BYTES_TO_MB(request), BYTES_TO_MB(avail),
+		BYTES_TO_MB(max), BYTES_TO_MB(taken));
+	return -ENOSPC;
+}
+
+/**
+ * intel_vgpu_free_resource() - free HW resource owned by a vGPU
+ * @vgpu: a vGPU
+ *
+ * This function is used to free the HW resource owned by a vGPU.
+ *
+ */
+void intel_vgpu_free_resource(struct intel_vgpu *vgpu)
+{
+	free_vgpu_gm(vgpu);
+	free_vgpu_fence(vgpu);
+	free_resource(vgpu);
+}
+
+/**
+ * intel_vgpu_reset_resource - reset resource state owned by a vGPU
+ * @vgpu: a vGPU
+ *
+ * This function is used to reset resource state owned by a vGPU.
+ *
+ */
+void intel_vgpu_reset_resource(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	intel_wakeref_t wakeref;
+
+	with_intel_runtime_pm(gvt->gt->uncore->rpm, wakeref)
+		_clear_vgpu_fence(vgpu);
+}
+
+/**
+ * intel_vgpu_alloc_resource() - allocate HW resource for a vGPU
+ * @vgpu: vGPU
+ * @conf: vGPU creation params
+ *
+ * This function is used to allocate HW resource for a vGPU. User specifies
+ * the resource configuration through the creation params.
+ *
+ * Returns:
+ * zero on success, negative error code if failed.
+ *
+ */
+int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu,
+		const struct intel_vgpu_config *conf)
+{
+	int ret;
+
+	ret = alloc_resource(vgpu, conf);
+	if (ret)
+		return ret;
+
+	ret = alloc_vgpu_gm(vgpu);
+	if (ret)
+		goto out_free_resource;
+
+	ret = alloc_vgpu_fence(vgpu);
+	if (ret)
+		goto out_free_vgpu_gm;
+
+	return 0;
+
+out_free_vgpu_gm:
+	free_vgpu_gm(vgpu);
+out_free_resource:
+	free_resource(vgpu);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
new file mode 100644
index 0000000000..9bafac1eaf
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -0,0 +1,400 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eddie Dong <eddie.dong@intel.com>
+ *    Jike Song <jike.song@intel.com>
+ *
+ * Contributors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "gvt.h"
+#include "intel_pci_config.h"
+
+enum {
+	INTEL_GVT_PCI_BAR_GTTMMIO = 0,
+	INTEL_GVT_PCI_BAR_APERTURE,
+	INTEL_GVT_PCI_BAR_PIO,
+	INTEL_GVT_PCI_BAR_MAX,
+};
+
+/* bitmap for writable bits (RW or RW1C bits, but cannot co-exist in one
+ * byte) byte by byte in standard pci configuration space. (not the full
+ * 256 bytes.)
+ */
+static const u8 pci_cfg_space_rw_bmp[PCI_INTERRUPT_LINE + 4] = {
+	[PCI_COMMAND]		= 0xff, 0x07,
+	[PCI_STATUS]		= 0x00, 0xf9, /* the only one RW1C byte */
+	[PCI_CACHE_LINE_SIZE]	= 0xff,
+	[PCI_BASE_ADDRESS_0 ... PCI_CARDBUS_CIS - 1] = 0xff,
+	[PCI_ROM_ADDRESS]	= 0x01, 0xf8, 0xff, 0xff,
+	[PCI_INTERRUPT_LINE]	= 0xff,
+};
+
+/**
+ * vgpu_pci_cfg_mem_write - write virtual cfg space memory
+ * @vgpu: target vgpu
+ * @off: offset
+ * @src: src ptr to write
+ * @bytes: number of bytes
+ *
+ * Use this function to write virtual cfg space memory.
+ * For standard cfg space, only RW bits can be changed,
+ * and we emulates the RW1C behavior of PCI_STATUS register.
+ */
+static void vgpu_pci_cfg_mem_write(struct intel_vgpu *vgpu, unsigned int off,
+				   u8 *src, unsigned int bytes)
+{
+	u8 *cfg_base = vgpu_cfg_space(vgpu);
+	u8 mask, new, old;
+	pci_power_t pwr;
+	int i = 0;
+
+	for (; i < bytes && (off + i < sizeof(pci_cfg_space_rw_bmp)); i++) {
+		mask = pci_cfg_space_rw_bmp[off + i];
+		old = cfg_base[off + i];
+		new = src[i] & mask;
+
+		/**
+		 * The PCI_STATUS high byte has RW1C bits, here
+		 * emulates clear by writing 1 for these bits.
+		 * Writing a 0b to RW1C bits has no effect.
+		 */
+		if (off + i == PCI_STATUS + 1)
+			new = (~new & old) & mask;
+
+		cfg_base[off + i] = (old & ~mask) | new;
+	}
+
+	/* For other configuration space directly copy as it is. */
+	if (i < bytes)
+		memcpy(cfg_base + off + i, src + i, bytes - i);
+
+	if (off == vgpu->cfg_space.pmcsr_off && vgpu->cfg_space.pmcsr_off) {
+		pwr = (pci_power_t __force)(*(u16*)(&vgpu_cfg_space(vgpu)[off])
+			& PCI_PM_CTRL_STATE_MASK);
+		if (pwr == PCI_D3hot)
+			vgpu->d3_entered = true;
+		gvt_dbg_core("vgpu-%d power status changed to %d\n",
+			     vgpu->id, pwr);
+	}
+}
+
+/**
+ * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space read
+ * @vgpu: target vgpu
+ * @offset: offset
+ * @p_data: return data ptr
+ * @bytes: number of bytes to read
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+
+	if (drm_WARN_ON(&i915->drm, bytes > 4))
+		return -EINVAL;
+
+	if (drm_WARN_ON(&i915->drm,
+			offset + bytes > vgpu->gvt->device_info.cfg_space_size))
+		return -EINVAL;
+
+	memcpy(p_data, vgpu_cfg_space(vgpu) + offset, bytes);
+	return 0;
+}
+
+static void map_aperture(struct intel_vgpu *vgpu, bool map)
+{
+	if (map != vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked)
+		vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].tracked = map;
+}
+
+static void trap_gttmmio(struct intel_vgpu *vgpu, bool trap)
+{
+	if (trap != vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].tracked)
+		vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].tracked = trap;
+}
+
+static int emulate_pci_command_write(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u8 old = vgpu_cfg_space(vgpu)[offset];
+	u8 new = *(u8 *)p_data;
+	u8 changed = old ^ new;
+
+	vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
+	if (!(changed & PCI_COMMAND_MEMORY))
+		return 0;
+
+	if (old & PCI_COMMAND_MEMORY) {
+		trap_gttmmio(vgpu, false);
+		map_aperture(vgpu, false);
+	} else {
+		trap_gttmmio(vgpu, true);
+		map_aperture(vgpu, true);
+	}
+
+	return 0;
+}
+
+static int emulate_pci_rom_bar_write(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 *pval = (u32 *)(vgpu_cfg_space(vgpu) + offset);
+	u32 new = *(u32 *)(p_data);
+
+	if ((new & PCI_ROM_ADDRESS_MASK) == PCI_ROM_ADDRESS_MASK)
+		/* We don't have rom, return size of 0. */
+		*pval = 0;
+	else
+		vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
+static void emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	u32 new = *(u32 *)(p_data);
+	bool lo = IS_ALIGNED(offset, 8);
+	u64 size;
+	bool mmio_enabled =
+		vgpu_cfg_space(vgpu)[PCI_COMMAND] & PCI_COMMAND_MEMORY;
+	struct intel_vgpu_pci_bar *bars = vgpu->cfg_space.bar;
+
+	/*
+	 * Power-up software can determine how much address
+	 * space the device requires by writing a value of
+	 * all 1's to the register and then reading the value
+	 * back. The device will return 0's in all don't-care
+	 * address bits.
+	 */
+	if (new == 0xffffffff) {
+		switch (offset) {
+		case PCI_BASE_ADDRESS_0:
+		case PCI_BASE_ADDRESS_1:
+			size = ~(bars[INTEL_GVT_PCI_BAR_GTTMMIO].size -1);
+			intel_vgpu_write_pci_bar(vgpu, offset,
+						size >> (lo ? 0 : 32), lo);
+			/*
+			 * Untrap the BAR, since guest hasn't configured a
+			 * valid GPA
+			 */
+			trap_gttmmio(vgpu, false);
+			break;
+		case PCI_BASE_ADDRESS_2:
+		case PCI_BASE_ADDRESS_3:
+			size = ~(bars[INTEL_GVT_PCI_BAR_APERTURE].size -1);
+			intel_vgpu_write_pci_bar(vgpu, offset,
+						size >> (lo ? 0 : 32), lo);
+			map_aperture(vgpu, false);
+			break;
+		default:
+			/* Unimplemented BARs */
+			intel_vgpu_write_pci_bar(vgpu, offset, 0x0, false);
+		}
+	} else {
+		switch (offset) {
+		case PCI_BASE_ADDRESS_0:
+		case PCI_BASE_ADDRESS_1:
+			/*
+			 * Untrap the old BAR first, since guest has
+			 * re-configured the BAR
+			 */
+			trap_gttmmio(vgpu, false);
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+			trap_gttmmio(vgpu, mmio_enabled);
+			break;
+		case PCI_BASE_ADDRESS_2:
+		case PCI_BASE_ADDRESS_3:
+			map_aperture(vgpu, false);
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+			map_aperture(vgpu, mmio_enabled);
+			break;
+		default:
+			intel_vgpu_write_pci_bar(vgpu, offset, new, lo);
+		}
+	}
+}
+
+/**
+ * intel_vgpu_emulate_cfg_write - emulate vGPU configuration space write
+ * @vgpu: target vgpu
+ * @offset: offset
+ * @p_data: write data ptr
+ * @bytes: number of bytes to write
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	int ret;
+
+	if (drm_WARN_ON(&i915->drm, bytes > 4))
+		return -EINVAL;
+
+	if (drm_WARN_ON(&i915->drm,
+			offset + bytes > vgpu->gvt->device_info.cfg_space_size))
+		return -EINVAL;
+
+	/* First check if it's PCI_COMMAND */
+	if (IS_ALIGNED(offset, 2) && offset == PCI_COMMAND) {
+		if (drm_WARN_ON(&i915->drm, bytes > 2))
+			return -EINVAL;
+		return emulate_pci_command_write(vgpu, offset, p_data, bytes);
+	}
+
+	switch (rounddown(offset, 4)) {
+	case PCI_ROM_ADDRESS:
+		if (drm_WARN_ON(&i915->drm, !IS_ALIGNED(offset, 4)))
+			return -EINVAL;
+		return emulate_pci_rom_bar_write(vgpu, offset, p_data, bytes);
+
+	case PCI_BASE_ADDRESS_0 ... PCI_BASE_ADDRESS_5:
+		if (drm_WARN_ON(&i915->drm, !IS_ALIGNED(offset, 4)))
+			return -EINVAL;
+		emulate_pci_bar_write(vgpu, offset, p_data, bytes);
+		break;
+	case INTEL_GVT_PCI_SWSCI:
+		if (drm_WARN_ON(&i915->drm, !IS_ALIGNED(offset, 4)))
+			return -EINVAL;
+		ret = intel_vgpu_emulate_opregion_request(vgpu, *(u32 *)p_data);
+		if (ret)
+			return ret;
+		break;
+
+	case INTEL_GVT_PCI_OPREGION:
+		if (drm_WARN_ON(&i915->drm, !IS_ALIGNED(offset, 4)))
+			return -EINVAL;
+		ret = intel_vgpu_opregion_base_write_handler(vgpu,
+						   *(u32 *)p_data);
+		if (ret)
+			return ret;
+
+		vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
+		break;
+	default:
+		vgpu_pci_cfg_mem_write(vgpu, offset, p_data, bytes);
+		break;
+	}
+	return 0;
+}
+
+/**
+ * intel_vgpu_init_cfg_space - init vGPU configuration space when create vGPU
+ *
+ * @vgpu: a vGPU
+ * @primary: is the vGPU presented as primary
+ *
+ */
+void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
+			       bool primary)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
+	const struct intel_gvt_device_info *info = &gvt->device_info;
+	u16 *gmch_ctl;
+	u8 next;
+
+	memcpy(vgpu_cfg_space(vgpu), gvt->firmware.cfg_space,
+	       info->cfg_space_size);
+
+	if (!primary) {
+		vgpu_cfg_space(vgpu)[PCI_CLASS_DEVICE] =
+			INTEL_GVT_PCI_CLASS_VGA_OTHER;
+		vgpu_cfg_space(vgpu)[PCI_CLASS_PROG] =
+			INTEL_GVT_PCI_CLASS_VGA_OTHER;
+	}
+
+	/* Show guest that there isn't any stolen memory.*/
+	gmch_ctl = (u16 *)(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_GMCH_CONTROL);
+	*gmch_ctl &= ~(BDW_GMCH_GMS_MASK << BDW_GMCH_GMS_SHIFT);
+
+	intel_vgpu_write_pci_bar(vgpu, PCI_BASE_ADDRESS_2,
+				 gvt_aperture_pa_base(gvt), true);
+
+	vgpu_cfg_space(vgpu)[PCI_COMMAND] &= ~(PCI_COMMAND_IO
+					     | PCI_COMMAND_MEMORY
+					     | PCI_COMMAND_MASTER);
+	/*
+	 * Clear the bar upper 32bit and let guest to assign the new value
+	 */
+	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_1, 0, 4);
+	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_3, 0, 4);
+	memset(vgpu_cfg_space(vgpu) + PCI_BASE_ADDRESS_4, 0, 8);
+	memset(vgpu_cfg_space(vgpu) + INTEL_GVT_PCI_OPREGION, 0, 4);
+
+	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_GTTMMIO].size =
+		pci_resource_len(pdev, GEN4_GTTMMADR_BAR);
+	vgpu->cfg_space.bar[INTEL_GVT_PCI_BAR_APERTURE].size =
+		pci_resource_len(pdev, GEN4_GMADR_BAR);
+
+	memset(vgpu_cfg_space(vgpu) + PCI_ROM_ADDRESS, 0, 4);
+
+	/* PM Support */
+	vgpu->cfg_space.pmcsr_off = 0;
+	if (vgpu_cfg_space(vgpu)[PCI_STATUS] & PCI_STATUS_CAP_LIST) {
+		next = vgpu_cfg_space(vgpu)[PCI_CAPABILITY_LIST];
+		do {
+			if (vgpu_cfg_space(vgpu)[next + PCI_CAP_LIST_ID] == PCI_CAP_ID_PM) {
+				vgpu->cfg_space.pmcsr_off = next + PCI_PM_CTRL;
+				break;
+			}
+			next = vgpu_cfg_space(vgpu)[next + PCI_CAP_LIST_NEXT];
+		} while (next);
+	}
+}
+
+/**
+ * intel_vgpu_reset_cfg_space - reset vGPU configuration space
+ *
+ * @vgpu: a vGPU
+ *
+ */
+void intel_vgpu_reset_cfg_space(struct intel_vgpu *vgpu)
+{
+	u8 cmd = vgpu_cfg_space(vgpu)[PCI_COMMAND];
+	bool primary = vgpu_cfg_space(vgpu)[PCI_CLASS_DEVICE] !=
+				INTEL_GVT_PCI_CLASS_VGA_OTHER;
+
+	if (cmd & PCI_COMMAND_MEMORY) {
+		trap_gttmmio(vgpu, false);
+		map_aperture(vgpu, false);
+	}
+
+	/**
+	 * Currently we only do such reset when vGPU is not
+	 * owned by any VM, so we simply restore entire cfg
+	 * space to default value.
+	 */
+	intel_vgpu_init_cfg_space(vgpu, primary);
+}
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
new file mode 100644
index 0000000000..05f9348b7a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -0,0 +1,3263 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Ke Yu
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Yulei Zhang <yulei.zhang@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#include <linux/slab.h>
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc.h"
+#include "gt/intel_ring.h"
+#include "gt/intel_gt_requests.h"
+#include "gt/shmem_utils.h"
+#include "gvt.h"
+#include "i915_pvinfo.h"
+#include "trace.h"
+
+#include "display/intel_display.h"
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+
+#define INVALID_OP    (~0U)
+
+#define OP_LEN_MI           9
+#define OP_LEN_2D           10
+#define OP_LEN_3D_MEDIA     16
+#define OP_LEN_MFX_VC       16
+#define OP_LEN_VEBOX	    16
+
+#define CMD_TYPE(cmd)	(((cmd) >> 29) & 7)
+
+struct sub_op_bits {
+	int hi;
+	int low;
+};
+struct decode_info {
+	const char *name;
+	int op_len;
+	int nr_sub_op;
+	const struct sub_op_bits *sub_op;
+};
+
+#define   MAX_CMD_BUDGET			0x7fffffff
+#define   MI_WAIT_FOR_PLANE_C_FLIP_PENDING      (1<<15)
+#define   MI_WAIT_FOR_PLANE_B_FLIP_PENDING      (1<<9)
+#define   MI_WAIT_FOR_PLANE_A_FLIP_PENDING      (1<<1)
+
+#define   MI_WAIT_FOR_SPRITE_C_FLIP_PENDING      (1<<20)
+#define   MI_WAIT_FOR_SPRITE_B_FLIP_PENDING      (1<<10)
+#define   MI_WAIT_FOR_SPRITE_A_FLIP_PENDING      (1<<2)
+
+/* Render Command Map */
+
+/* MI_* command Opcode (28:23) */
+#define OP_MI_NOOP                          0x0
+#define OP_MI_SET_PREDICATE                 0x1  /* HSW+ */
+#define OP_MI_USER_INTERRUPT                0x2
+#define OP_MI_WAIT_FOR_EVENT                0x3
+#define OP_MI_FLUSH                         0x4
+#define OP_MI_ARB_CHECK                     0x5
+#define OP_MI_RS_CONTROL                    0x6  /* HSW+ */
+#define OP_MI_REPORT_HEAD                   0x7
+#define OP_MI_ARB_ON_OFF                    0x8
+#define OP_MI_URB_ATOMIC_ALLOC              0x9  /* HSW+ */
+#define OP_MI_BATCH_BUFFER_END              0xA
+#define OP_MI_SUSPEND_FLUSH                 0xB
+#define OP_MI_PREDICATE                     0xC  /* IVB+ */
+#define OP_MI_TOPOLOGY_FILTER               0xD  /* IVB+ */
+#define OP_MI_SET_APPID                     0xE  /* IVB+ */
+#define OP_MI_RS_CONTEXT                    0xF  /* HSW+ */
+#define OP_MI_LOAD_SCAN_LINES_INCL          0x12 /* HSW+ */
+#define OP_MI_DISPLAY_FLIP                  0x14
+#define OP_MI_SEMAPHORE_MBOX                0x16
+#define OP_MI_SET_CONTEXT                   0x18
+#define OP_MI_MATH                          0x1A
+#define OP_MI_URB_CLEAR                     0x19
+#define OP_MI_SEMAPHORE_SIGNAL		    0x1B  /* BDW+ */
+#define OP_MI_SEMAPHORE_WAIT		    0x1C  /* BDW+ */
+
+#define OP_MI_STORE_DATA_IMM                0x20
+#define OP_MI_STORE_DATA_INDEX              0x21
+#define OP_MI_LOAD_REGISTER_IMM             0x22
+#define OP_MI_UPDATE_GTT                    0x23
+#define OP_MI_STORE_REGISTER_MEM            0x24
+#define OP_MI_FLUSH_DW                      0x26
+#define OP_MI_CLFLUSH                       0x27
+#define OP_MI_REPORT_PERF_COUNT             0x28
+#define OP_MI_LOAD_REGISTER_MEM             0x29  /* HSW+ */
+#define OP_MI_LOAD_REGISTER_REG             0x2A  /* HSW+ */
+#define OP_MI_RS_STORE_DATA_IMM             0x2B  /* HSW+ */
+#define OP_MI_LOAD_URB_MEM                  0x2C  /* HSW+ */
+#define OP_MI_STORE_URM_MEM                 0x2D  /* HSW+ */
+#define OP_MI_2E			    0x2E  /* BDW+ */
+#define OP_MI_2F			    0x2F  /* BDW+ */
+#define OP_MI_BATCH_BUFFER_START            0x31
+
+/* Bit definition for dword 0 */
+#define _CMDBIT_BB_START_IN_PPGTT	(1UL << 8)
+
+#define OP_MI_CONDITIONAL_BATCH_BUFFER_END  0x36
+
+#define BATCH_BUFFER_ADDR_MASK ((1UL << 32) - (1U << 2))
+#define BATCH_BUFFER_ADDR_HIGH_MASK ((1UL << 16) - (1U))
+#define BATCH_BUFFER_ADR_SPACE_BIT(x)	(((x) >> 8) & 1U)
+#define BATCH_BUFFER_2ND_LEVEL_BIT(x)   ((x) >> 22 & 1U)
+
+/* 2D command: Opcode (28:22) */
+#define OP_2D(x)    ((2<<7) | x)
+
+#define OP_XY_SETUP_BLT                             OP_2D(0x1)
+#define OP_XY_SETUP_CLIP_BLT                        OP_2D(0x3)
+#define OP_XY_SETUP_MONO_PATTERN_SL_BLT             OP_2D(0x11)
+#define OP_XY_PIXEL_BLT                             OP_2D(0x24)
+#define OP_XY_SCANLINES_BLT                         OP_2D(0x25)
+#define OP_XY_TEXT_BLT                              OP_2D(0x26)
+#define OP_XY_TEXT_IMMEDIATE_BLT                    OP_2D(0x31)
+#define OP_XY_COLOR_BLT                             OP_2D(0x50)
+#define OP_XY_PAT_BLT                               OP_2D(0x51)
+#define OP_XY_MONO_PAT_BLT                          OP_2D(0x52)
+#define OP_XY_SRC_COPY_BLT                          OP_2D(0x53)
+#define OP_XY_MONO_SRC_COPY_BLT                     OP_2D(0x54)
+#define OP_XY_FULL_BLT                              OP_2D(0x55)
+#define OP_XY_FULL_MONO_SRC_BLT                     OP_2D(0x56)
+#define OP_XY_FULL_MONO_PATTERN_BLT                 OP_2D(0x57)
+#define OP_XY_FULL_MONO_PATTERN_MONO_SRC_BLT        OP_2D(0x58)
+#define OP_XY_MONO_PAT_FIXED_BLT                    OP_2D(0x59)
+#define OP_XY_MONO_SRC_COPY_IMMEDIATE_BLT           OP_2D(0x71)
+#define OP_XY_PAT_BLT_IMMEDIATE                     OP_2D(0x72)
+#define OP_XY_SRC_COPY_CHROMA_BLT                   OP_2D(0x73)
+#define OP_XY_FULL_IMMEDIATE_PATTERN_BLT            OP_2D(0x74)
+#define OP_XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT   OP_2D(0x75)
+#define OP_XY_PAT_CHROMA_BLT                        OP_2D(0x76)
+#define OP_XY_PAT_CHROMA_BLT_IMMEDIATE              OP_2D(0x77)
+
+/* 3D/Media Command: Pipeline Type(28:27) Opcode(26:24) Sub Opcode(23:16) */
+#define OP_3D_MEDIA(sub_type, opcode, sub_opcode) \
+	((3 << 13) | ((sub_type) << 11) | ((opcode) << 8) | (sub_opcode))
+
+#define OP_STATE_PREFETCH                       OP_3D_MEDIA(0x0, 0x0, 0x03)
+
+#define OP_STATE_BASE_ADDRESS                   OP_3D_MEDIA(0x0, 0x1, 0x01)
+#define OP_STATE_SIP                            OP_3D_MEDIA(0x0, 0x1, 0x02)
+#define OP_3D_MEDIA_0_1_4			OP_3D_MEDIA(0x0, 0x1, 0x04)
+#define OP_SWTESS_BASE_ADDRESS			OP_3D_MEDIA(0x0, 0x1, 0x03)
+
+#define OP_3DSTATE_VF_STATISTICS_GM45           OP_3D_MEDIA(0x1, 0x0, 0x0B)
+
+#define OP_PIPELINE_SELECT                      OP_3D_MEDIA(0x1, 0x1, 0x04)
+
+#define OP_MEDIA_VFE_STATE                      OP_3D_MEDIA(0x2, 0x0, 0x0)
+#define OP_MEDIA_CURBE_LOAD                     OP_3D_MEDIA(0x2, 0x0, 0x1)
+#define OP_MEDIA_INTERFACE_DESCRIPTOR_LOAD      OP_3D_MEDIA(0x2, 0x0, 0x2)
+#define OP_MEDIA_GATEWAY_STATE                  OP_3D_MEDIA(0x2, 0x0, 0x3)
+#define OP_MEDIA_STATE_FLUSH                    OP_3D_MEDIA(0x2, 0x0, 0x4)
+#define OP_MEDIA_POOL_STATE                     OP_3D_MEDIA(0x2, 0x0, 0x5)
+
+#define OP_MEDIA_OBJECT                         OP_3D_MEDIA(0x2, 0x1, 0x0)
+#define OP_MEDIA_OBJECT_PRT                     OP_3D_MEDIA(0x2, 0x1, 0x2)
+#define OP_MEDIA_OBJECT_WALKER                  OP_3D_MEDIA(0x2, 0x1, 0x3)
+#define OP_GPGPU_WALKER                         OP_3D_MEDIA(0x2, 0x1, 0x5)
+
+#define OP_3DSTATE_CLEAR_PARAMS                 OP_3D_MEDIA(0x3, 0x0, 0x04) /* IVB+ */
+#define OP_3DSTATE_DEPTH_BUFFER                 OP_3D_MEDIA(0x3, 0x0, 0x05) /* IVB+ */
+#define OP_3DSTATE_STENCIL_BUFFER               OP_3D_MEDIA(0x3, 0x0, 0x06) /* IVB+ */
+#define OP_3DSTATE_HIER_DEPTH_BUFFER            OP_3D_MEDIA(0x3, 0x0, 0x07) /* IVB+ */
+#define OP_3DSTATE_VERTEX_BUFFERS               OP_3D_MEDIA(0x3, 0x0, 0x08)
+#define OP_3DSTATE_VERTEX_ELEMENTS              OP_3D_MEDIA(0x3, 0x0, 0x09)
+#define OP_3DSTATE_INDEX_BUFFER                 OP_3D_MEDIA(0x3, 0x0, 0x0A)
+#define OP_3DSTATE_VF_STATISTICS                OP_3D_MEDIA(0x3, 0x0, 0x0B)
+#define OP_3DSTATE_VF                           OP_3D_MEDIA(0x3, 0x0, 0x0C)  /* HSW+ */
+#define OP_3DSTATE_CC_STATE_POINTERS            OP_3D_MEDIA(0x3, 0x0, 0x0E)
+#define OP_3DSTATE_SCISSOR_STATE_POINTERS       OP_3D_MEDIA(0x3, 0x0, 0x0F)
+#define OP_3DSTATE_VS                           OP_3D_MEDIA(0x3, 0x0, 0x10)
+#define OP_3DSTATE_GS                           OP_3D_MEDIA(0x3, 0x0, 0x11)
+#define OP_3DSTATE_CLIP                         OP_3D_MEDIA(0x3, 0x0, 0x12)
+#define OP_3DSTATE_SF                           OP_3D_MEDIA(0x3, 0x0, 0x13)
+#define OP_3DSTATE_WM                           OP_3D_MEDIA(0x3, 0x0, 0x14)
+#define OP_3DSTATE_CONSTANT_VS                  OP_3D_MEDIA(0x3, 0x0, 0x15)
+#define OP_3DSTATE_CONSTANT_GS                  OP_3D_MEDIA(0x3, 0x0, 0x16)
+#define OP_3DSTATE_CONSTANT_PS                  OP_3D_MEDIA(0x3, 0x0, 0x17)
+#define OP_3DSTATE_SAMPLE_MASK                  OP_3D_MEDIA(0x3, 0x0, 0x18)
+#define OP_3DSTATE_CONSTANT_HS                  OP_3D_MEDIA(0x3, 0x0, 0x19) /* IVB+ */
+#define OP_3DSTATE_CONSTANT_DS                  OP_3D_MEDIA(0x3, 0x0, 0x1A) /* IVB+ */
+#define OP_3DSTATE_HS                           OP_3D_MEDIA(0x3, 0x0, 0x1B) /* IVB+ */
+#define OP_3DSTATE_TE                           OP_3D_MEDIA(0x3, 0x0, 0x1C) /* IVB+ */
+#define OP_3DSTATE_DS                           OP_3D_MEDIA(0x3, 0x0, 0x1D) /* IVB+ */
+#define OP_3DSTATE_STREAMOUT                    OP_3D_MEDIA(0x3, 0x0, 0x1E) /* IVB+ */
+#define OP_3DSTATE_SBE                          OP_3D_MEDIA(0x3, 0x0, 0x1F) /* IVB+ */
+#define OP_3DSTATE_PS                           OP_3D_MEDIA(0x3, 0x0, 0x20) /* IVB+ */
+#define OP_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP OP_3D_MEDIA(0x3, 0x0, 0x21) /* IVB+ */
+#define OP_3DSTATE_VIEWPORT_STATE_POINTERS_CC   OP_3D_MEDIA(0x3, 0x0, 0x23) /* IVB+ */
+#define OP_3DSTATE_BLEND_STATE_POINTERS         OP_3D_MEDIA(0x3, 0x0, 0x24) /* IVB+ */
+#define OP_3DSTATE_DEPTH_STENCIL_STATE_POINTERS OP_3D_MEDIA(0x3, 0x0, 0x25) /* IVB+ */
+#define OP_3DSTATE_BINDING_TABLE_POINTERS_VS    OP_3D_MEDIA(0x3, 0x0, 0x26) /* IVB+ */
+#define OP_3DSTATE_BINDING_TABLE_POINTERS_HS    OP_3D_MEDIA(0x3, 0x0, 0x27) /* IVB+ */
+#define OP_3DSTATE_BINDING_TABLE_POINTERS_DS    OP_3D_MEDIA(0x3, 0x0, 0x28) /* IVB+ */
+#define OP_3DSTATE_BINDING_TABLE_POINTERS_GS    OP_3D_MEDIA(0x3, 0x0, 0x29) /* IVB+ */
+#define OP_3DSTATE_BINDING_TABLE_POINTERS_PS    OP_3D_MEDIA(0x3, 0x0, 0x2A) /* IVB+ */
+#define OP_3DSTATE_SAMPLER_STATE_POINTERS_VS    OP_3D_MEDIA(0x3, 0x0, 0x2B) /* IVB+ */
+#define OP_3DSTATE_SAMPLER_STATE_POINTERS_HS    OP_3D_MEDIA(0x3, 0x0, 0x2C) /* IVB+ */
+#define OP_3DSTATE_SAMPLER_STATE_POINTERS_DS    OP_3D_MEDIA(0x3, 0x0, 0x2D) /* IVB+ */
+#define OP_3DSTATE_SAMPLER_STATE_POINTERS_GS    OP_3D_MEDIA(0x3, 0x0, 0x2E) /* IVB+ */
+#define OP_3DSTATE_SAMPLER_STATE_POINTERS_PS    OP_3D_MEDIA(0x3, 0x0, 0x2F) /* IVB+ */
+#define OP_3DSTATE_URB_VS                       OP_3D_MEDIA(0x3, 0x0, 0x30) /* IVB+ */
+#define OP_3DSTATE_URB_HS                       OP_3D_MEDIA(0x3, 0x0, 0x31) /* IVB+ */
+#define OP_3DSTATE_URB_DS                       OP_3D_MEDIA(0x3, 0x0, 0x32) /* IVB+ */
+#define OP_3DSTATE_URB_GS                       OP_3D_MEDIA(0x3, 0x0, 0x33) /* IVB+ */
+#define OP_3DSTATE_GATHER_CONSTANT_VS           OP_3D_MEDIA(0x3, 0x0, 0x34) /* HSW+ */
+#define OP_3DSTATE_GATHER_CONSTANT_GS           OP_3D_MEDIA(0x3, 0x0, 0x35) /* HSW+ */
+#define OP_3DSTATE_GATHER_CONSTANT_HS           OP_3D_MEDIA(0x3, 0x0, 0x36) /* HSW+ */
+#define OP_3DSTATE_GATHER_CONSTANT_DS           OP_3D_MEDIA(0x3, 0x0, 0x37) /* HSW+ */
+#define OP_3DSTATE_GATHER_CONSTANT_PS           OP_3D_MEDIA(0x3, 0x0, 0x38) /* HSW+ */
+#define OP_3DSTATE_DX9_CONSTANTF_VS             OP_3D_MEDIA(0x3, 0x0, 0x39) /* HSW+ */
+#define OP_3DSTATE_DX9_CONSTANTF_PS             OP_3D_MEDIA(0x3, 0x0, 0x3A) /* HSW+ */
+#define OP_3DSTATE_DX9_CONSTANTI_VS             OP_3D_MEDIA(0x3, 0x0, 0x3B) /* HSW+ */
+#define OP_3DSTATE_DX9_CONSTANTI_PS             OP_3D_MEDIA(0x3, 0x0, 0x3C) /* HSW+ */
+#define OP_3DSTATE_DX9_CONSTANTB_VS             OP_3D_MEDIA(0x3, 0x0, 0x3D) /* HSW+ */
+#define OP_3DSTATE_DX9_CONSTANTB_PS             OP_3D_MEDIA(0x3, 0x0, 0x3E) /* HSW+ */
+#define OP_3DSTATE_DX9_LOCAL_VALID_VS           OP_3D_MEDIA(0x3, 0x0, 0x3F) /* HSW+ */
+#define OP_3DSTATE_DX9_LOCAL_VALID_PS           OP_3D_MEDIA(0x3, 0x0, 0x40) /* HSW+ */
+#define OP_3DSTATE_DX9_GENERATE_ACTIVE_VS       OP_3D_MEDIA(0x3, 0x0, 0x41) /* HSW+ */
+#define OP_3DSTATE_DX9_GENERATE_ACTIVE_PS       OP_3D_MEDIA(0x3, 0x0, 0x42) /* HSW+ */
+#define OP_3DSTATE_BINDING_TABLE_EDIT_VS        OP_3D_MEDIA(0x3, 0x0, 0x43) /* HSW+ */
+#define OP_3DSTATE_BINDING_TABLE_EDIT_GS        OP_3D_MEDIA(0x3, 0x0, 0x44) /* HSW+ */
+#define OP_3DSTATE_BINDING_TABLE_EDIT_HS        OP_3D_MEDIA(0x3, 0x0, 0x45) /* HSW+ */
+#define OP_3DSTATE_BINDING_TABLE_EDIT_DS        OP_3D_MEDIA(0x3, 0x0, 0x46) /* HSW+ */
+#define OP_3DSTATE_BINDING_TABLE_EDIT_PS        OP_3D_MEDIA(0x3, 0x0, 0x47) /* HSW+ */
+
+#define OP_3DSTATE_VF_INSTANCING 		OP_3D_MEDIA(0x3, 0x0, 0x49) /* BDW+ */
+#define OP_3DSTATE_VF_SGVS  			OP_3D_MEDIA(0x3, 0x0, 0x4A) /* BDW+ */
+#define OP_3DSTATE_VF_TOPOLOGY   		OP_3D_MEDIA(0x3, 0x0, 0x4B) /* BDW+ */
+#define OP_3DSTATE_WM_CHROMAKEY   		OP_3D_MEDIA(0x3, 0x0, 0x4C) /* BDW+ */
+#define OP_3DSTATE_PS_BLEND   			OP_3D_MEDIA(0x3, 0x0, 0x4D) /* BDW+ */
+#define OP_3DSTATE_WM_DEPTH_STENCIL   		OP_3D_MEDIA(0x3, 0x0, 0x4E) /* BDW+ */
+#define OP_3DSTATE_PS_EXTRA   			OP_3D_MEDIA(0x3, 0x0, 0x4F) /* BDW+ */
+#define OP_3DSTATE_RASTER   			OP_3D_MEDIA(0x3, 0x0, 0x50) /* BDW+ */
+#define OP_3DSTATE_SBE_SWIZ   			OP_3D_MEDIA(0x3, 0x0, 0x51) /* BDW+ */
+#define OP_3DSTATE_WM_HZ_OP   			OP_3D_MEDIA(0x3, 0x0, 0x52) /* BDW+ */
+#define OP_3DSTATE_COMPONENT_PACKING		OP_3D_MEDIA(0x3, 0x0, 0x55) /* SKL+ */
+
+#define OP_3DSTATE_DRAWING_RECTANGLE            OP_3D_MEDIA(0x3, 0x1, 0x00)
+#define OP_3DSTATE_SAMPLER_PALETTE_LOAD0        OP_3D_MEDIA(0x3, 0x1, 0x02)
+#define OP_3DSTATE_CHROMA_KEY                   OP_3D_MEDIA(0x3, 0x1, 0x04)
+#define OP_SNB_3DSTATE_DEPTH_BUFFER             OP_3D_MEDIA(0x3, 0x1, 0x05)
+#define OP_3DSTATE_POLY_STIPPLE_OFFSET          OP_3D_MEDIA(0x3, 0x1, 0x06)
+#define OP_3DSTATE_POLY_STIPPLE_PATTERN         OP_3D_MEDIA(0x3, 0x1, 0x07)
+#define OP_3DSTATE_LINE_STIPPLE                 OP_3D_MEDIA(0x3, 0x1, 0x08)
+#define OP_3DSTATE_AA_LINE_PARAMS               OP_3D_MEDIA(0x3, 0x1, 0x0A)
+#define OP_3DSTATE_GS_SVB_INDEX                 OP_3D_MEDIA(0x3, 0x1, 0x0B)
+#define OP_3DSTATE_SAMPLER_PALETTE_LOAD1        OP_3D_MEDIA(0x3, 0x1, 0x0C)
+#define OP_3DSTATE_MULTISAMPLE_BDW		OP_3D_MEDIA(0x3, 0x0, 0x0D)
+#define OP_SNB_3DSTATE_STENCIL_BUFFER           OP_3D_MEDIA(0x3, 0x1, 0x0E)
+#define OP_SNB_3DSTATE_HIER_DEPTH_BUFFER        OP_3D_MEDIA(0x3, 0x1, 0x0F)
+#define OP_SNB_3DSTATE_CLEAR_PARAMS             OP_3D_MEDIA(0x3, 0x1, 0x10)
+#define OP_3DSTATE_MONOFILTER_SIZE              OP_3D_MEDIA(0x3, 0x1, 0x11)
+#define OP_3DSTATE_PUSH_CONSTANT_ALLOC_VS       OP_3D_MEDIA(0x3, 0x1, 0x12) /* IVB+ */
+#define OP_3DSTATE_PUSH_CONSTANT_ALLOC_HS       OP_3D_MEDIA(0x3, 0x1, 0x13) /* IVB+ */
+#define OP_3DSTATE_PUSH_CONSTANT_ALLOC_DS       OP_3D_MEDIA(0x3, 0x1, 0x14) /* IVB+ */
+#define OP_3DSTATE_PUSH_CONSTANT_ALLOC_GS       OP_3D_MEDIA(0x3, 0x1, 0x15) /* IVB+ */
+#define OP_3DSTATE_PUSH_CONSTANT_ALLOC_PS       OP_3D_MEDIA(0x3, 0x1, 0x16) /* IVB+ */
+#define OP_3DSTATE_SO_DECL_LIST                 OP_3D_MEDIA(0x3, 0x1, 0x17)
+#define OP_3DSTATE_SO_BUFFER                    OP_3D_MEDIA(0x3, 0x1, 0x18)
+#define OP_3DSTATE_BINDING_TABLE_POOL_ALLOC     OP_3D_MEDIA(0x3, 0x1, 0x19) /* HSW+ */
+#define OP_3DSTATE_GATHER_POOL_ALLOC            OP_3D_MEDIA(0x3, 0x1, 0x1A) /* HSW+ */
+#define OP_3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC OP_3D_MEDIA(0x3, 0x1, 0x1B) /* HSW+ */
+#define OP_3DSTATE_SAMPLE_PATTERN               OP_3D_MEDIA(0x3, 0x1, 0x1C)
+#define OP_PIPE_CONTROL                         OP_3D_MEDIA(0x3, 0x2, 0x00)
+#define OP_3DPRIMITIVE                          OP_3D_MEDIA(0x3, 0x3, 0x00)
+
+/* VCCP Command Parser */
+
+/*
+ * Below MFX and VBE cmd definition is from vaapi intel driver project (BSD License)
+ * git://anongit.freedesktop.org/vaapi/intel-driver
+ * src/i965_defines.h
+ *
+ */
+
+#define OP_MFX(pipeline, op, sub_opa, sub_opb)     \
+	(3 << 13 | \
+	 (pipeline) << 11 | \
+	 (op) << 8 | \
+	 (sub_opa) << 5 | \
+	 (sub_opb))
+
+#define OP_MFX_PIPE_MODE_SELECT                    OP_MFX(2, 0, 0, 0)  /* ALL */
+#define OP_MFX_SURFACE_STATE                       OP_MFX(2, 0, 0, 1)  /* ALL */
+#define OP_MFX_PIPE_BUF_ADDR_STATE                 OP_MFX(2, 0, 0, 2)  /* ALL */
+#define OP_MFX_IND_OBJ_BASE_ADDR_STATE             OP_MFX(2, 0, 0, 3)  /* ALL */
+#define OP_MFX_BSP_BUF_BASE_ADDR_STATE             OP_MFX(2, 0, 0, 4)  /* ALL */
+#define OP_2_0_0_5                                 OP_MFX(2, 0, 0, 5)  /* ALL */
+#define OP_MFX_STATE_POINTER                       OP_MFX(2, 0, 0, 6)  /* ALL */
+#define OP_MFX_QM_STATE                            OP_MFX(2, 0, 0, 7)  /* IVB+ */
+#define OP_MFX_FQM_STATE                           OP_MFX(2, 0, 0, 8)  /* IVB+ */
+#define OP_MFX_PAK_INSERT_OBJECT                   OP_MFX(2, 0, 2, 8)  /* IVB+ */
+#define OP_MFX_STITCH_OBJECT                       OP_MFX(2, 0, 2, 0xA)  /* IVB+ */
+
+#define OP_MFD_IT_OBJECT                           OP_MFX(2, 0, 1, 9) /* ALL */
+
+#define OP_MFX_WAIT                                OP_MFX(1, 0, 0, 0) /* IVB+ */
+#define OP_MFX_AVC_IMG_STATE                       OP_MFX(2, 1, 0, 0) /* ALL */
+#define OP_MFX_AVC_QM_STATE                        OP_MFX(2, 1, 0, 1) /* ALL */
+#define OP_MFX_AVC_DIRECTMODE_STATE                OP_MFX(2, 1, 0, 2) /* ALL */
+#define OP_MFX_AVC_SLICE_STATE                     OP_MFX(2, 1, 0, 3) /* ALL */
+#define OP_MFX_AVC_REF_IDX_STATE                   OP_MFX(2, 1, 0, 4) /* ALL */
+#define OP_MFX_AVC_WEIGHTOFFSET_STATE              OP_MFX(2, 1, 0, 5) /* ALL */
+#define OP_MFD_AVC_PICID_STATE                     OP_MFX(2, 1, 1, 5) /* HSW+ */
+#define OP_MFD_AVC_DPB_STATE			   OP_MFX(2, 1, 1, 6) /* IVB+ */
+#define OP_MFD_AVC_SLICEADDR                       OP_MFX(2, 1, 1, 7) /* IVB+ */
+#define OP_MFD_AVC_BSD_OBJECT                      OP_MFX(2, 1, 1, 8) /* ALL */
+#define OP_MFC_AVC_PAK_OBJECT                      OP_MFX(2, 1, 2, 9) /* ALL */
+
+#define OP_MFX_VC1_PRED_PIPE_STATE                 OP_MFX(2, 2, 0, 1) /* ALL */
+#define OP_MFX_VC1_DIRECTMODE_STATE                OP_MFX(2, 2, 0, 2) /* ALL */
+#define OP_MFD_VC1_SHORT_PIC_STATE                 OP_MFX(2, 2, 1, 0) /* IVB+ */
+#define OP_MFD_VC1_LONG_PIC_STATE                  OP_MFX(2, 2, 1, 1) /* IVB+ */
+#define OP_MFD_VC1_BSD_OBJECT                      OP_MFX(2, 2, 1, 8) /* ALL */
+
+#define OP_MFX_MPEG2_PIC_STATE                     OP_MFX(2, 3, 0, 0) /* ALL */
+#define OP_MFX_MPEG2_QM_STATE                      OP_MFX(2, 3, 0, 1) /* ALL */
+#define OP_MFD_MPEG2_BSD_OBJECT                    OP_MFX(2, 3, 1, 8) /* ALL */
+#define OP_MFC_MPEG2_SLICEGROUP_STATE              OP_MFX(2, 3, 2, 3) /* ALL */
+#define OP_MFC_MPEG2_PAK_OBJECT                    OP_MFX(2, 3, 2, 9) /* ALL */
+
+#define OP_MFX_2_6_0_0                             OP_MFX(2, 6, 0, 0) /* IVB+ */
+#define OP_MFX_2_6_0_8                             OP_MFX(2, 6, 0, 8) /* IVB+ */
+#define OP_MFX_2_6_0_9                             OP_MFX(2, 6, 0, 9) /* IVB+ */
+
+#define OP_MFX_JPEG_PIC_STATE                      OP_MFX(2, 7, 0, 0)
+#define OP_MFX_JPEG_HUFF_TABLE_STATE               OP_MFX(2, 7, 0, 2)
+#define OP_MFD_JPEG_BSD_OBJECT                     OP_MFX(2, 7, 1, 8)
+
+#define OP_VEB(pipeline, op, sub_opa, sub_opb) \
+	(3 << 13 | \
+	 (pipeline) << 11 | \
+	 (op) << 8 | \
+	 (sub_opa) << 5 | \
+	 (sub_opb))
+
+#define OP_VEB_SURFACE_STATE                       OP_VEB(2, 4, 0, 0)
+#define OP_VEB_STATE                               OP_VEB(2, 4, 0, 2)
+#define OP_VEB_DNDI_IECP_STATE                     OP_VEB(2, 4, 0, 3)
+
+struct parser_exec_state;
+
+typedef int (*parser_cmd_handler)(struct parser_exec_state *s);
+
+#define GVT_CMD_HASH_BITS   7
+
+/* which DWords need address fix */
+#define ADDR_FIX_1(x1)			(1 << (x1))
+#define ADDR_FIX_2(x1, x2)		(ADDR_FIX_1(x1) | ADDR_FIX_1(x2))
+#define ADDR_FIX_3(x1, x2, x3)		(ADDR_FIX_1(x1) | ADDR_FIX_2(x2, x3))
+#define ADDR_FIX_4(x1, x2, x3, x4)	(ADDR_FIX_1(x1) | ADDR_FIX_3(x2, x3, x4))
+#define ADDR_FIX_5(x1, x2, x3, x4, x5)  (ADDR_FIX_1(x1) | ADDR_FIX_4(x2, x3, x4, x5))
+
+#define DWORD_FIELD(dword, end, start) \
+	FIELD_GET(GENMASK(end, start), cmd_val(s, dword))
+
+#define OP_LENGTH_BIAS 2
+#define CMD_LEN(value)  (value + OP_LENGTH_BIAS)
+
+static int gvt_check_valid_cmd_length(int len, int valid_len)
+{
+	if (valid_len != len) {
+		gvt_err("len is not valid:  len=%u  valid_len=%u\n",
+			len, valid_len);
+		return -EFAULT;
+	}
+	return 0;
+}
+
+struct cmd_info {
+	const char *name;
+	u32 opcode;
+
+#define F_LEN_MASK	3U
+#define F_LEN_CONST  1U
+#define F_LEN_VAR    0U
+/* value is const although LEN maybe variable */
+#define F_LEN_VAR_FIXED    (1<<1)
+
+/*
+ * command has its own ip advance logic
+ * e.g. MI_BATCH_START, MI_BATCH_END
+ */
+#define F_IP_ADVANCE_CUSTOM (1<<2)
+	u32 flag;
+
+#define R_RCS	BIT(RCS0)
+#define R_VCS1  BIT(VCS0)
+#define R_VCS2  BIT(VCS1)
+#define R_VCS	(R_VCS1 | R_VCS2)
+#define R_BCS	BIT(BCS0)
+#define R_VECS	BIT(VECS0)
+#define R_ALL (R_RCS | R_VCS | R_BCS | R_VECS)
+	/* rings that support this cmd: BLT/RCS/VCS/VECS */
+	intel_engine_mask_t rings;
+
+	/* devices that support this cmd: SNB/IVB/HSW/... */
+	u16 devices;
+
+	/* which DWords are address that need fix up.
+	 * bit 0 means a 32-bit non address operand in command
+	 * bit 1 means address operand, which could be 32-bit
+	 * or 64-bit depending on different architectures.(
+	 * defined by "gmadr_bytes_in_cmd" in intel_gvt.
+	 * No matter the address length, each address only takes
+	 * one bit in the bitmap.
+	 */
+	u16 addr_bitmap;
+
+	/* flag == F_LEN_CONST : command length
+	 * flag == F_LEN_VAR : length bias bits
+	 * Note: length is in DWord
+	 */
+	u32 len;
+
+	parser_cmd_handler handler;
+
+	/* valid length in DWord */
+	u32 valid_len;
+};
+
+struct cmd_entry {
+	struct hlist_node hlist;
+	const struct cmd_info *info;
+};
+
+enum {
+	RING_BUFFER_INSTRUCTION,
+	BATCH_BUFFER_INSTRUCTION,
+	BATCH_BUFFER_2ND_LEVEL,
+	RING_BUFFER_CTX,
+};
+
+enum {
+	GTT_BUFFER,
+	PPGTT_BUFFER
+};
+
+struct parser_exec_state {
+	struct intel_vgpu *vgpu;
+	const struct intel_engine_cs *engine;
+
+	int buf_type;
+
+	/* batch buffer address type */
+	int buf_addr_type;
+
+	/* graphics memory address of ring buffer start */
+	unsigned long ring_start;
+	unsigned long ring_size;
+	unsigned long ring_head;
+	unsigned long ring_tail;
+
+	/* instruction graphics memory address */
+	unsigned long ip_gma;
+
+	/* mapped va of the instr_gma */
+	void *ip_va;
+	void *rb_va;
+
+	void *ret_bb_va;
+	/* next instruction when return from  batch buffer to ring buffer */
+	unsigned long ret_ip_gma_ring;
+
+	/* next instruction when return from 2nd batch buffer to batch buffer */
+	unsigned long ret_ip_gma_bb;
+
+	/* batch buffer address type (GTT or PPGTT)
+	 * used when ret from 2nd level batch buffer
+	 */
+	int saved_buf_addr_type;
+	bool is_ctx_wa;
+	bool is_init_ctx;
+
+	const struct cmd_info *info;
+
+	struct intel_vgpu_workload *workload;
+};
+
+#define gmadr_dw_number(s)	\
+	(s->vgpu->gvt->device_info.gmadr_bytes_in_cmd >> 2)
+
+static unsigned long bypass_scan_mask = 0;
+
+/* ring ALL, type = 0 */
+static const struct sub_op_bits sub_op_mi[] = {
+	{31, 29},
+	{28, 23},
+};
+
+static const struct decode_info decode_info_mi = {
+	"MI",
+	OP_LEN_MI,
+	ARRAY_SIZE(sub_op_mi),
+	sub_op_mi,
+};
+
+/* ring RCS, command type 2 */
+static const struct sub_op_bits sub_op_2d[] = {
+	{31, 29},
+	{28, 22},
+};
+
+static const struct decode_info decode_info_2d = {
+	"2D",
+	OP_LEN_2D,
+	ARRAY_SIZE(sub_op_2d),
+	sub_op_2d,
+};
+
+/* ring RCS, command type 3 */
+static const struct sub_op_bits sub_op_3d_media[] = {
+	{31, 29},
+	{28, 27},
+	{26, 24},
+	{23, 16},
+};
+
+static const struct decode_info decode_info_3d_media = {
+	"3D_Media",
+	OP_LEN_3D_MEDIA,
+	ARRAY_SIZE(sub_op_3d_media),
+	sub_op_3d_media,
+};
+
+/* ring VCS, command type 3 */
+static const struct sub_op_bits sub_op_mfx_vc[] = {
+	{31, 29},
+	{28, 27},
+	{26, 24},
+	{23, 21},
+	{20, 16},
+};
+
+static const struct decode_info decode_info_mfx_vc = {
+	"MFX_VC",
+	OP_LEN_MFX_VC,
+	ARRAY_SIZE(sub_op_mfx_vc),
+	sub_op_mfx_vc,
+};
+
+/* ring VECS, command type 3 */
+static const struct sub_op_bits sub_op_vebox[] = {
+	{31, 29},
+	{28, 27},
+	{26, 24},
+	{23, 21},
+	{20, 16},
+};
+
+static const struct decode_info decode_info_vebox = {
+	"VEBOX",
+	OP_LEN_VEBOX,
+	ARRAY_SIZE(sub_op_vebox),
+	sub_op_vebox,
+};
+
+static const struct decode_info *ring_decode_info[I915_NUM_ENGINES][8] = {
+	[RCS0] = {
+		&decode_info_mi,
+		NULL,
+		NULL,
+		&decode_info_3d_media,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+	},
+
+	[VCS0] = {
+		&decode_info_mi,
+		NULL,
+		NULL,
+		&decode_info_mfx_vc,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+	},
+
+	[BCS0] = {
+		&decode_info_mi,
+		NULL,
+		&decode_info_2d,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+	},
+
+	[VECS0] = {
+		&decode_info_mi,
+		NULL,
+		NULL,
+		&decode_info_vebox,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+	},
+
+	[VCS1] = {
+		&decode_info_mi,
+		NULL,
+		NULL,
+		&decode_info_mfx_vc,
+		NULL,
+		NULL,
+		NULL,
+		NULL,
+	},
+};
+
+static inline u32 get_opcode(u32 cmd, const struct intel_engine_cs *engine)
+{
+	const struct decode_info *d_info;
+
+	d_info = ring_decode_info[engine->id][CMD_TYPE(cmd)];
+	if (d_info == NULL)
+		return INVALID_OP;
+
+	return cmd >> (32 - d_info->op_len);
+}
+
+static inline const struct cmd_info *
+find_cmd_entry(struct intel_gvt *gvt, unsigned int opcode,
+	       const struct intel_engine_cs *engine)
+{
+	struct cmd_entry *e;
+
+	hash_for_each_possible(gvt->cmd_table, e, hlist, opcode) {
+		if (opcode == e->info->opcode &&
+		    e->info->rings & engine->mask)
+			return e->info;
+	}
+	return NULL;
+}
+
+static inline const struct cmd_info *
+get_cmd_info(struct intel_gvt *gvt, u32 cmd,
+	     const struct intel_engine_cs *engine)
+{
+	u32 opcode;
+
+	opcode = get_opcode(cmd, engine);
+	if (opcode == INVALID_OP)
+		return NULL;
+
+	return find_cmd_entry(gvt, opcode, engine);
+}
+
+static inline u32 sub_op_val(u32 cmd, u32 hi, u32 low)
+{
+	return (cmd >> low) & ((1U << (hi - low + 1)) - 1);
+}
+
+static inline void print_opcode(u32 cmd, const struct intel_engine_cs *engine)
+{
+	const struct decode_info *d_info;
+	int i;
+
+	d_info = ring_decode_info[engine->id][CMD_TYPE(cmd)];
+	if (d_info == NULL)
+		return;
+
+	gvt_dbg_cmd("opcode=0x%x %s sub_ops:",
+			cmd >> (32 - d_info->op_len), d_info->name);
+
+	for (i = 0; i < d_info->nr_sub_op; i++)
+		pr_err("0x%x ", sub_op_val(cmd, d_info->sub_op[i].hi,
+					d_info->sub_op[i].low));
+
+	pr_err("\n");
+}
+
+static inline u32 *cmd_ptr(struct parser_exec_state *s, int index)
+{
+	return s->ip_va + (index << 2);
+}
+
+static inline u32 cmd_val(struct parser_exec_state *s, int index)
+{
+	return *cmd_ptr(s, index);
+}
+
+static inline bool is_init_ctx(struct parser_exec_state *s)
+{
+	return (s->buf_type == RING_BUFFER_CTX && s->is_init_ctx);
+}
+
+static void parser_exec_state_dump(struct parser_exec_state *s)
+{
+	int cnt = 0;
+	int i;
+
+	gvt_dbg_cmd("  vgpu%d RING%s: ring_start(%08lx) ring_end(%08lx)"
+		    " ring_head(%08lx) ring_tail(%08lx)\n",
+		    s->vgpu->id, s->engine->name,
+		    s->ring_start, s->ring_start + s->ring_size,
+		    s->ring_head, s->ring_tail);
+
+	gvt_dbg_cmd("  %s %s ip_gma(%08lx) ",
+			s->buf_type == RING_BUFFER_INSTRUCTION ?
+			"RING_BUFFER" : ((s->buf_type == RING_BUFFER_CTX) ?
+				"CTX_BUFFER" : "BATCH_BUFFER"),
+			s->buf_addr_type == GTT_BUFFER ?
+			"GTT" : "PPGTT", s->ip_gma);
+
+	if (s->ip_va == NULL) {
+		gvt_dbg_cmd(" ip_va(NULL)");
+		return;
+	}
+
+	gvt_dbg_cmd("  ip_va=%p: %08x %08x %08x %08x\n",
+			s->ip_va, cmd_val(s, 0), cmd_val(s, 1),
+			cmd_val(s, 2), cmd_val(s, 3));
+
+	print_opcode(cmd_val(s, 0), s->engine);
+
+	s->ip_va = (u32 *)((((u64)s->ip_va) >> 12) << 12);
+
+	while (cnt < 1024) {
+		gvt_dbg_cmd("ip_va=%p: ", s->ip_va);
+		for (i = 0; i < 8; i++)
+			gvt_dbg_cmd("%08x ", cmd_val(s, i));
+		gvt_dbg_cmd("\n");
+
+		s->ip_va += 8 * sizeof(u32);
+		cnt += 8;
+	}
+}
+
+static inline void update_ip_va(struct parser_exec_state *s)
+{
+	unsigned long len = 0;
+
+	if (WARN_ON(s->ring_head == s->ring_tail))
+		return;
+
+	if (s->buf_type == RING_BUFFER_INSTRUCTION ||
+			s->buf_type == RING_BUFFER_CTX) {
+		unsigned long ring_top = s->ring_start + s->ring_size;
+
+		if (s->ring_head > s->ring_tail) {
+			if (s->ip_gma >= s->ring_head && s->ip_gma < ring_top)
+				len = (s->ip_gma - s->ring_head);
+			else if (s->ip_gma >= s->ring_start &&
+					s->ip_gma <= s->ring_tail)
+				len = (ring_top - s->ring_head) +
+					(s->ip_gma - s->ring_start);
+		} else
+			len = (s->ip_gma - s->ring_head);
+
+		s->ip_va = s->rb_va + len;
+	} else {/* shadow batch buffer */
+		s->ip_va = s->ret_bb_va;
+	}
+}
+
+static inline int ip_gma_set(struct parser_exec_state *s,
+		unsigned long ip_gma)
+{
+	WARN_ON(!IS_ALIGNED(ip_gma, 4));
+
+	s->ip_gma = ip_gma;
+	update_ip_va(s);
+	return 0;
+}
+
+static inline int ip_gma_advance(struct parser_exec_state *s,
+		unsigned int dw_len)
+{
+	s->ip_gma += (dw_len << 2);
+
+	if (s->buf_type == RING_BUFFER_INSTRUCTION) {
+		if (s->ip_gma >= s->ring_start + s->ring_size)
+			s->ip_gma -= s->ring_size;
+		update_ip_va(s);
+	} else {
+		s->ip_va += (dw_len << 2);
+	}
+
+	return 0;
+}
+
+static inline int get_cmd_length(const struct cmd_info *info, u32 cmd)
+{
+	if ((info->flag & F_LEN_MASK) == F_LEN_CONST)
+		return info->len;
+	else
+		return (cmd & ((1U << info->len) - 1)) + 2;
+	return 0;
+}
+
+static inline int cmd_length(struct parser_exec_state *s)
+{
+	return get_cmd_length(s->info, cmd_val(s, 0));
+}
+
+/* do not remove this, some platform may need clflush here */
+#define patch_value(s, addr, val) do { \
+	*addr = val; \
+} while (0)
+
+static inline bool is_mocs_mmio(unsigned int offset)
+{
+	return ((offset >= 0xc800) && (offset <= 0xcff8)) ||
+		((offset >= 0xb020) && (offset <= 0xb0a0));
+}
+
+static int is_cmd_update_pdps(unsigned int offset,
+			      struct parser_exec_state *s)
+{
+	u32 base = s->workload->engine->mmio_base;
+	return i915_mmio_reg_equal(_MMIO(offset), GEN8_RING_PDP_UDW(base, 0));
+}
+
+static int cmd_pdp_mmio_update_handler(struct parser_exec_state *s,
+				       unsigned int offset, unsigned int index)
+{
+	struct intel_vgpu *vgpu = s->vgpu;
+	struct intel_vgpu_mm *shadow_mm = s->workload->shadow_mm;
+	struct intel_vgpu_mm *mm;
+	u64 pdps[GEN8_3LVL_PDPES];
+
+	if (shadow_mm->ppgtt_mm.root_entry_type ==
+	    GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+		pdps[0] = (u64)cmd_val(s, 2) << 32;
+		pdps[0] |= cmd_val(s, 4);
+
+		mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
+		if (!mm) {
+			gvt_vgpu_err("failed to get the 4-level shadow vm\n");
+			return -EINVAL;
+		}
+		intel_vgpu_mm_get(mm);
+		list_add_tail(&mm->ppgtt_mm.link,
+			      &s->workload->lri_shadow_mm);
+		*cmd_ptr(s, 2) = upper_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
+		*cmd_ptr(s, 4) = lower_32_bits(mm->ppgtt_mm.shadow_pdps[0]);
+	} else {
+		/* Currently all guests use PML4 table and now can't
+		 * have a guest with 3-level table but uses LRI for
+		 * PPGTT update. So this is simply un-testable. */
+		GEM_BUG_ON(1);
+		gvt_vgpu_err("invalid shared shadow vm type\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int cmd_reg_handler(struct parser_exec_state *s,
+	unsigned int offset, unsigned int index, char *cmd)
+{
+	struct intel_vgpu *vgpu = s->vgpu;
+	struct intel_gvt *gvt = vgpu->gvt;
+	u32 ctx_sr_ctl;
+	u32 *vreg, vreg_old;
+
+	if (offset + 4 > gvt->device_info.mmio_size) {
+		gvt_vgpu_err("%s access to (%x) outside of MMIO range\n",
+				cmd, offset);
+		return -EFAULT;
+	}
+
+	if (is_init_ctx(s)) {
+		struct intel_gvt_mmio_info *mmio_info;
+
+		intel_gvt_mmio_set_cmd_accessible(gvt, offset);
+		mmio_info = intel_gvt_find_mmio_info(gvt, offset);
+		if (mmio_info && mmio_info->write)
+			intel_gvt_mmio_set_cmd_write_patch(gvt, offset);
+		return 0;
+	}
+
+	if (!intel_gvt_mmio_is_cmd_accessible(gvt, offset)) {
+		gvt_vgpu_err("%s access to non-render register (%x)\n",
+				cmd, offset);
+		return -EBADRQC;
+	}
+
+	if (!strncmp(cmd, "srm", 3) ||
+			!strncmp(cmd, "lrm", 3)) {
+		if (offset == i915_mmio_reg_offset(GEN8_L3SQCREG4) ||
+		    offset == 0x21f0 ||
+		    (IS_BROADWELL(gvt->gt->i915) &&
+		     offset == i915_mmio_reg_offset(INSTPM)))
+			return 0;
+		else {
+			gvt_vgpu_err("%s access to register (%x)\n",
+					cmd, offset);
+			return -EPERM;
+		}
+	}
+
+	if (!strncmp(cmd, "lrr-src", 7) ||
+			!strncmp(cmd, "lrr-dst", 7)) {
+		if (IS_BROADWELL(gvt->gt->i915) && offset == 0x215c)
+			return 0;
+		else {
+			gvt_vgpu_err("not allowed cmd %s reg (%x)\n", cmd, offset);
+			return -EPERM;
+		}
+	}
+
+	if (!strncmp(cmd, "pipe_ctrl", 9)) {
+		/* TODO: add LRI POST logic here */
+		return 0;
+	}
+
+	if (strncmp(cmd, "lri", 3))
+		return -EPERM;
+
+	/* below are all lri handlers */
+	vreg = &vgpu_vreg(s->vgpu, offset);
+
+	if (is_cmd_update_pdps(offset, s) &&
+	    cmd_pdp_mmio_update_handler(s, offset, index))
+		return -EINVAL;
+
+	if (offset == i915_mmio_reg_offset(DERRMR) ||
+		offset == i915_mmio_reg_offset(FORCEWAKE_MT)) {
+		/* Writing to HW VGT_PVINFO_PAGE offset will be discarded */
+		patch_value(s, cmd_ptr(s, index), VGT_PVINFO_PAGE);
+	}
+
+	if (is_mocs_mmio(offset))
+		*vreg = cmd_val(s, index + 1);
+
+	vreg_old = *vreg;
+
+	if (intel_gvt_mmio_is_cmd_write_patch(gvt, offset)) {
+		u32 cmdval_new, cmdval;
+		struct intel_gvt_mmio_info *mmio_info;
+
+		cmdval = cmd_val(s, index + 1);
+
+		mmio_info = intel_gvt_find_mmio_info(gvt, offset);
+		if (!mmio_info) {
+			cmdval_new = cmdval;
+		} else {
+			u64 ro_mask = mmio_info->ro_mask;
+			int ret;
+
+			if (likely(!ro_mask))
+				ret = mmio_info->write(s->vgpu, offset,
+						&cmdval, 4);
+			else {
+				gvt_vgpu_err("try to write RO reg %x\n",
+						offset);
+				ret = -EBADRQC;
+			}
+			if (ret)
+				return ret;
+			cmdval_new = *vreg;
+		}
+		if (cmdval_new != cmdval)
+			patch_value(s, cmd_ptr(s, index+1), cmdval_new);
+	}
+
+	/* only patch cmd. restore vreg value if changed in mmio write handler*/
+	*vreg = vreg_old;
+
+	/* TODO
+	 * In order to let workload with inhibit context to generate
+	 * correct image data into memory, vregs values will be loaded to
+	 * hw via LRIs in the workload with inhibit context. But as
+	 * indirect context is loaded prior to LRIs in workload, we don't
+	 * want reg values specified in indirect context overwritten by
+	 * LRIs in workloads. So, when scanning an indirect context, we
+	 * update reg values in it into vregs, so LRIs in workload with
+	 * inhibit context will restore with correct values
+	 */
+	if (GRAPHICS_VER(s->engine->i915) == 9 &&
+	    intel_gvt_mmio_is_sr_in_ctx(gvt, offset) &&
+	    !strncmp(cmd, "lri", 3)) {
+		intel_gvt_read_gpa(s->vgpu,
+			s->workload->ring_context_gpa + 12, &ctx_sr_ctl, 4);
+		/* check inhibit context */
+		if (ctx_sr_ctl & 1) {
+			u32 data = cmd_val(s, index + 1);
+
+			if (intel_gvt_mmio_has_mode_mask(s->vgpu->gvt, offset))
+				intel_vgpu_mask_mmio_write(vgpu,
+							offset, &data, 4);
+			else
+				vgpu_vreg(vgpu, offset) = data;
+		}
+	}
+
+	return 0;
+}
+
+#define cmd_reg(s, i) \
+	(cmd_val(s, i) & GENMASK(22, 2))
+
+#define cmd_reg_inhibit(s, i) \
+	(cmd_val(s, i) & GENMASK(22, 18))
+
+#define cmd_gma(s, i) \
+	(cmd_val(s, i) & GENMASK(31, 2))
+
+#define cmd_gma_hi(s, i) \
+	(cmd_val(s, i) & GENMASK(15, 0))
+
+static int cmd_handler_lri(struct parser_exec_state *s)
+{
+	int i, ret = 0;
+	int cmd_len = cmd_length(s);
+
+	for (i = 1; i < cmd_len; i += 2) {
+		if (IS_BROADWELL(s->engine->i915) && s->engine->id != RCS0) {
+			if (s->engine->id == BCS0 &&
+			    cmd_reg(s, i) == i915_mmio_reg_offset(DERRMR))
+				ret |= 0;
+			else
+				ret |= cmd_reg_inhibit(s, i) ? -EBADRQC : 0;
+		}
+		if (ret)
+			break;
+		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lri");
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static int cmd_handler_lrr(struct parser_exec_state *s)
+{
+	int i, ret = 0;
+	int cmd_len = cmd_length(s);
+
+	for (i = 1; i < cmd_len; i += 2) {
+		if (IS_BROADWELL(s->engine->i915))
+			ret |= ((cmd_reg_inhibit(s, i) ||
+				 (cmd_reg_inhibit(s, i + 1)))) ?
+				-EBADRQC : 0;
+		if (ret)
+			break;
+		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrr-src");
+		if (ret)
+			break;
+		ret |= cmd_reg_handler(s, cmd_reg(s, i + 1), i, "lrr-dst");
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+static inline int cmd_address_audit(struct parser_exec_state *s,
+		unsigned long guest_gma, int op_size, bool index_mode);
+
+static int cmd_handler_lrm(struct parser_exec_state *s)
+{
+	struct intel_gvt *gvt = s->vgpu->gvt;
+	int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd;
+	unsigned long gma;
+	int i, ret = 0;
+	int cmd_len = cmd_length(s);
+
+	for (i = 1; i < cmd_len;) {
+		if (IS_BROADWELL(s->engine->i915))
+			ret |= (cmd_reg_inhibit(s, i)) ? -EBADRQC : 0;
+		if (ret)
+			break;
+		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "lrm");
+		if (ret)
+			break;
+		if (cmd_val(s, 0) & (1 << 22)) {
+			gma = cmd_gma(s, i + 1);
+			if (gmadr_bytes == 8)
+				gma |= (cmd_gma_hi(s, i + 2)) << 32;
+			ret |= cmd_address_audit(s, gma, sizeof(u32), false);
+			if (ret)
+				break;
+		}
+		i += gmadr_dw_number(s) + 1;
+	}
+	return ret;
+}
+
+static int cmd_handler_srm(struct parser_exec_state *s)
+{
+	int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	unsigned long gma;
+	int i, ret = 0;
+	int cmd_len = cmd_length(s);
+
+	for (i = 1; i < cmd_len;) {
+		ret |= cmd_reg_handler(s, cmd_reg(s, i), i, "srm");
+		if (ret)
+			break;
+		if (cmd_val(s, 0) & (1 << 22)) {
+			gma = cmd_gma(s, i + 1);
+			if (gmadr_bytes == 8)
+				gma |= (cmd_gma_hi(s, i + 2)) << 32;
+			ret |= cmd_address_audit(s, gma, sizeof(u32), false);
+			if (ret)
+				break;
+		}
+		i += gmadr_dw_number(s) + 1;
+	}
+	return ret;
+}
+
+struct cmd_interrupt_event {
+	int pipe_control_notify;
+	int mi_flush_dw;
+	int mi_user_interrupt;
+};
+
+static const struct cmd_interrupt_event cmd_interrupt_events[] = {
+	[RCS0] = {
+		.pipe_control_notify = RCS_PIPE_CONTROL,
+		.mi_flush_dw = INTEL_GVT_EVENT_RESERVED,
+		.mi_user_interrupt = RCS_MI_USER_INTERRUPT,
+	},
+	[BCS0] = {
+		.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
+		.mi_flush_dw = BCS_MI_FLUSH_DW,
+		.mi_user_interrupt = BCS_MI_USER_INTERRUPT,
+	},
+	[VCS0] = {
+		.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
+		.mi_flush_dw = VCS_MI_FLUSH_DW,
+		.mi_user_interrupt = VCS_MI_USER_INTERRUPT,
+	},
+	[VCS1] = {
+		.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
+		.mi_flush_dw = VCS2_MI_FLUSH_DW,
+		.mi_user_interrupt = VCS2_MI_USER_INTERRUPT,
+	},
+	[VECS0] = {
+		.pipe_control_notify = INTEL_GVT_EVENT_RESERVED,
+		.mi_flush_dw = VECS_MI_FLUSH_DW,
+		.mi_user_interrupt = VECS_MI_USER_INTERRUPT,
+	},
+};
+
+static int cmd_handler_pipe_control(struct parser_exec_state *s)
+{
+	int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	unsigned long gma;
+	bool index_mode = false;
+	unsigned int post_sync;
+	int ret = 0;
+	u32 hws_pga, val;
+
+	post_sync = (cmd_val(s, 1) & PIPE_CONTROL_POST_SYNC_OP_MASK) >> 14;
+
+	/* LRI post sync */
+	if (cmd_val(s, 1) & PIPE_CONTROL_MMIO_WRITE)
+		ret = cmd_reg_handler(s, cmd_reg(s, 2), 1, "pipe_ctrl");
+	/* post sync */
+	else if (post_sync) {
+		if (post_sync == 2)
+			ret = cmd_reg_handler(s, 0x2350, 1, "pipe_ctrl");
+		else if (post_sync == 3)
+			ret = cmd_reg_handler(s, 0x2358, 1, "pipe_ctrl");
+		else if (post_sync == 1) {
+			/* check ggtt*/
+			if ((cmd_val(s, 1) & PIPE_CONTROL_GLOBAL_GTT_IVB)) {
+				gma = cmd_val(s, 2) & GENMASK(31, 3);
+				if (gmadr_bytes == 8)
+					gma |= (cmd_gma_hi(s, 3)) << 32;
+				/* Store Data Index */
+				if (cmd_val(s, 1) & (1 << 21))
+					index_mode = true;
+				ret |= cmd_address_audit(s, gma, sizeof(u64),
+						index_mode);
+				if (ret)
+					return ret;
+				if (index_mode) {
+					hws_pga = s->vgpu->hws_pga[s->engine->id];
+					gma = hws_pga + gma;
+					patch_value(s, cmd_ptr(s, 2), gma);
+					val = cmd_val(s, 1) & (~(1 << 21));
+					patch_value(s, cmd_ptr(s, 1), val);
+				}
+			}
+		}
+	}
+
+	if (ret)
+		return ret;
+
+	if (cmd_val(s, 1) & PIPE_CONTROL_NOTIFY)
+		set_bit(cmd_interrupt_events[s->engine->id].pipe_control_notify,
+			s->workload->pending_events);
+	return 0;
+}
+
+static int cmd_handler_mi_user_interrupt(struct parser_exec_state *s)
+{
+	set_bit(cmd_interrupt_events[s->engine->id].mi_user_interrupt,
+		s->workload->pending_events);
+	patch_value(s, cmd_ptr(s, 0), MI_NOOP);
+	return 0;
+}
+
+static int cmd_advance_default(struct parser_exec_state *s)
+{
+	return ip_gma_advance(s, cmd_length(s));
+}
+
+static int cmd_handler_mi_batch_buffer_end(struct parser_exec_state *s)
+{
+	int ret;
+
+	if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
+		s->buf_type = BATCH_BUFFER_INSTRUCTION;
+		ret = ip_gma_set(s, s->ret_ip_gma_bb);
+		s->buf_addr_type = s->saved_buf_addr_type;
+	} else if (s->buf_type == RING_BUFFER_CTX) {
+		ret = ip_gma_set(s, s->ring_tail);
+	} else {
+		s->buf_type = RING_BUFFER_INSTRUCTION;
+		s->buf_addr_type = GTT_BUFFER;
+		if (s->ret_ip_gma_ring >= s->ring_start + s->ring_size)
+			s->ret_ip_gma_ring -= s->ring_size;
+		ret = ip_gma_set(s, s->ret_ip_gma_ring);
+	}
+	return ret;
+}
+
+struct mi_display_flip_command_info {
+	int pipe;
+	int plane;
+	int event;
+	i915_reg_t stride_reg;
+	i915_reg_t ctrl_reg;
+	i915_reg_t surf_reg;
+	u64 stride_val;
+	u64 tile_val;
+	u64 surf_val;
+	bool async_flip;
+};
+
+struct plane_code_mapping {
+	int pipe;
+	int plane;
+	int event;
+};
+
+static int gen8_decode_mi_display_flip(struct parser_exec_state *s,
+		struct mi_display_flip_command_info *info)
+{
+	struct drm_i915_private *dev_priv = s->engine->i915;
+	struct plane_code_mapping gen8_plane_code[] = {
+		[0] = {PIPE_A, PLANE_A, PRIMARY_A_FLIP_DONE},
+		[1] = {PIPE_B, PLANE_A, PRIMARY_B_FLIP_DONE},
+		[2] = {PIPE_A, PLANE_B, SPRITE_A_FLIP_DONE},
+		[3] = {PIPE_B, PLANE_B, SPRITE_B_FLIP_DONE},
+		[4] = {PIPE_C, PLANE_A, PRIMARY_C_FLIP_DONE},
+		[5] = {PIPE_C, PLANE_B, SPRITE_C_FLIP_DONE},
+	};
+	u32 dword0, dword1, dword2;
+	u32 v;
+
+	dword0 = cmd_val(s, 0);
+	dword1 = cmd_val(s, 1);
+	dword2 = cmd_val(s, 2);
+
+	v = (dword0 & GENMASK(21, 19)) >> 19;
+	if (drm_WARN_ON(&dev_priv->drm, v >= ARRAY_SIZE(gen8_plane_code)))
+		return -EBADRQC;
+
+	info->pipe = gen8_plane_code[v].pipe;
+	info->plane = gen8_plane_code[v].plane;
+	info->event = gen8_plane_code[v].event;
+	info->stride_val = (dword1 & GENMASK(15, 6)) >> 6;
+	info->tile_val = (dword1 & 0x1);
+	info->surf_val = (dword2 & GENMASK(31, 12)) >> 12;
+	info->async_flip = ((dword2 & GENMASK(1, 0)) == 0x1);
+
+	if (info->plane == PLANE_A) {
+		info->ctrl_reg = DSPCNTR(info->pipe);
+		info->stride_reg = DSPSTRIDE(info->pipe);
+		info->surf_reg = DSPSURF(info->pipe);
+	} else if (info->plane == PLANE_B) {
+		info->ctrl_reg = SPRCTL(info->pipe);
+		info->stride_reg = SPRSTRIDE(info->pipe);
+		info->surf_reg = SPRSURF(info->pipe);
+	} else {
+		drm_WARN_ON(&dev_priv->drm, 1);
+		return -EBADRQC;
+	}
+	return 0;
+}
+
+static int skl_decode_mi_display_flip(struct parser_exec_state *s,
+		struct mi_display_flip_command_info *info)
+{
+	struct drm_i915_private *dev_priv = s->engine->i915;
+	struct intel_vgpu *vgpu = s->vgpu;
+	u32 dword0 = cmd_val(s, 0);
+	u32 dword1 = cmd_val(s, 1);
+	u32 dword2 = cmd_val(s, 2);
+	u32 plane = (dword0 & GENMASK(12, 8)) >> 8;
+
+	info->plane = PRIMARY_PLANE;
+
+	switch (plane) {
+	case MI_DISPLAY_FLIP_SKL_PLANE_1_A:
+		info->pipe = PIPE_A;
+		info->event = PRIMARY_A_FLIP_DONE;
+		break;
+	case MI_DISPLAY_FLIP_SKL_PLANE_1_B:
+		info->pipe = PIPE_B;
+		info->event = PRIMARY_B_FLIP_DONE;
+		break;
+	case MI_DISPLAY_FLIP_SKL_PLANE_1_C:
+		info->pipe = PIPE_C;
+		info->event = PRIMARY_C_FLIP_DONE;
+		break;
+
+	case MI_DISPLAY_FLIP_SKL_PLANE_2_A:
+		info->pipe = PIPE_A;
+		info->event = SPRITE_A_FLIP_DONE;
+		info->plane = SPRITE_PLANE;
+		break;
+	case MI_DISPLAY_FLIP_SKL_PLANE_2_B:
+		info->pipe = PIPE_B;
+		info->event = SPRITE_B_FLIP_DONE;
+		info->plane = SPRITE_PLANE;
+		break;
+	case MI_DISPLAY_FLIP_SKL_PLANE_2_C:
+		info->pipe = PIPE_C;
+		info->event = SPRITE_C_FLIP_DONE;
+		info->plane = SPRITE_PLANE;
+		break;
+
+	default:
+		gvt_vgpu_err("unknown plane code %d\n", plane);
+		return -EBADRQC;
+	}
+
+	info->stride_val = (dword1 & GENMASK(15, 6)) >> 6;
+	info->tile_val = (dword1 & GENMASK(2, 0));
+	info->surf_val = (dword2 & GENMASK(31, 12)) >> 12;
+	info->async_flip = ((dword2 & GENMASK(1, 0)) == 0x1);
+
+	info->ctrl_reg = DSPCNTR(info->pipe);
+	info->stride_reg = DSPSTRIDE(info->pipe);
+	info->surf_reg = DSPSURF(info->pipe);
+
+	return 0;
+}
+
+static int gen8_check_mi_display_flip(struct parser_exec_state *s,
+		struct mi_display_flip_command_info *info)
+{
+	u32 stride, tile;
+
+	if (!info->async_flip)
+		return 0;
+
+	if (GRAPHICS_VER(s->engine->i915) >= 9) {
+		stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0);
+		tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) &
+				GENMASK(12, 10)) >> 10;
+	} else {
+		stride = (vgpu_vreg_t(s->vgpu, info->stride_reg) &
+				GENMASK(15, 6)) >> 6;
+		tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10;
+	}
+
+	if (stride != info->stride_val)
+		gvt_dbg_cmd("cannot change stride during async flip\n");
+
+	if (tile != info->tile_val)
+		gvt_dbg_cmd("cannot change tile during async flip\n");
+
+	return 0;
+}
+
+static int gen8_update_plane_mmio_from_mi_display_flip(
+		struct parser_exec_state *s,
+		struct mi_display_flip_command_info *info)
+{
+	struct drm_i915_private *dev_priv = s->engine->i915;
+	struct intel_vgpu *vgpu = s->vgpu;
+
+	set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12),
+		      info->surf_val << 12);
+	if (GRAPHICS_VER(dev_priv) >= 9) {
+		set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0),
+			      info->stride_val);
+		set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10),
+			      info->tile_val << 10);
+	} else {
+		set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(15, 6),
+			      info->stride_val << 6);
+		set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(10, 10),
+			      info->tile_val << 10);
+	}
+
+	if (info->plane == PLANE_PRIMARY)
+		vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(info->pipe))++;
+
+	if (info->async_flip)
+		intel_vgpu_trigger_virtual_event(vgpu, info->event);
+	else
+		set_bit(info->event, vgpu->irq.flip_done_event[info->pipe]);
+
+	return 0;
+}
+
+static int decode_mi_display_flip(struct parser_exec_state *s,
+		struct mi_display_flip_command_info *info)
+{
+	if (IS_BROADWELL(s->engine->i915))
+		return gen8_decode_mi_display_flip(s, info);
+	if (GRAPHICS_VER(s->engine->i915) >= 9)
+		return skl_decode_mi_display_flip(s, info);
+
+	return -ENODEV;
+}
+
+static int check_mi_display_flip(struct parser_exec_state *s,
+		struct mi_display_flip_command_info *info)
+{
+	return gen8_check_mi_display_flip(s, info);
+}
+
+static int update_plane_mmio_from_mi_display_flip(
+		struct parser_exec_state *s,
+		struct mi_display_flip_command_info *info)
+{
+	return gen8_update_plane_mmio_from_mi_display_flip(s, info);
+}
+
+static int cmd_handler_mi_display_flip(struct parser_exec_state *s)
+{
+	struct mi_display_flip_command_info info;
+	struct intel_vgpu *vgpu = s->vgpu;
+	int ret;
+	int i;
+	int len = cmd_length(s);
+	u32 valid_len = CMD_LEN(1);
+
+	/* Flip Type == Stereo 3D Flip */
+	if (DWORD_FIELD(2, 1, 0) == 2)
+		valid_len++;
+	ret = gvt_check_valid_cmd_length(cmd_length(s),
+			valid_len);
+	if (ret)
+		return ret;
+
+	ret = decode_mi_display_flip(s, &info);
+	if (ret) {
+		gvt_vgpu_err("fail to decode MI display flip command\n");
+		return ret;
+	}
+
+	ret = check_mi_display_flip(s, &info);
+	if (ret) {
+		gvt_vgpu_err("invalid MI display flip command\n");
+		return ret;
+	}
+
+	ret = update_plane_mmio_from_mi_display_flip(s, &info);
+	if (ret) {
+		gvt_vgpu_err("fail to update plane mmio\n");
+		return ret;
+	}
+
+	for (i = 0; i < len; i++)
+		patch_value(s, cmd_ptr(s, i), MI_NOOP);
+	return 0;
+}
+
+static bool is_wait_for_flip_pending(u32 cmd)
+{
+	return cmd & (MI_WAIT_FOR_PLANE_A_FLIP_PENDING |
+			MI_WAIT_FOR_PLANE_B_FLIP_PENDING |
+			MI_WAIT_FOR_PLANE_C_FLIP_PENDING |
+			MI_WAIT_FOR_SPRITE_A_FLIP_PENDING |
+			MI_WAIT_FOR_SPRITE_B_FLIP_PENDING |
+			MI_WAIT_FOR_SPRITE_C_FLIP_PENDING);
+}
+
+static int cmd_handler_mi_wait_for_event(struct parser_exec_state *s)
+{
+	u32 cmd = cmd_val(s, 0);
+
+	if (!is_wait_for_flip_pending(cmd))
+		return 0;
+
+	patch_value(s, cmd_ptr(s, 0), MI_NOOP);
+	return 0;
+}
+
+static unsigned long get_gma_bb_from_cmd(struct parser_exec_state *s, int index)
+{
+	unsigned long addr;
+	unsigned long gma_high, gma_low;
+	struct intel_vgpu *vgpu = s->vgpu;
+	int gmadr_bytes = vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+
+	if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8)) {
+		gvt_vgpu_err("invalid gma bytes %d\n", gmadr_bytes);
+		return INTEL_GVT_INVALID_ADDR;
+	}
+
+	gma_low = cmd_val(s, index) & BATCH_BUFFER_ADDR_MASK;
+	if (gmadr_bytes == 4) {
+		addr = gma_low;
+	} else {
+		gma_high = cmd_val(s, index + 1) & BATCH_BUFFER_ADDR_HIGH_MASK;
+		addr = (((unsigned long)gma_high) << 32) | gma_low;
+	}
+	return addr;
+}
+
+static inline int cmd_address_audit(struct parser_exec_state *s,
+		unsigned long guest_gma, int op_size, bool index_mode)
+{
+	struct intel_vgpu *vgpu = s->vgpu;
+	u32 max_surface_size = vgpu->gvt->device_info.max_surface_size;
+	int i;
+	int ret;
+
+	if (op_size > max_surface_size) {
+		gvt_vgpu_err("command address audit fail name %s\n",
+			s->info->name);
+		return -EFAULT;
+	}
+
+	if (index_mode)	{
+		if (guest_gma >= I915_GTT_PAGE_SIZE) {
+			ret = -EFAULT;
+			goto err;
+		}
+	} else if (!intel_gvt_ggtt_validate_range(vgpu, guest_gma, op_size)) {
+		ret = -EFAULT;
+		goto err;
+	}
+
+	return 0;
+
+err:
+	gvt_vgpu_err("cmd_parser: Malicious %s detected, addr=0x%lx, len=%d!\n",
+			s->info->name, guest_gma, op_size);
+
+	pr_err("cmd dump: ");
+	for (i = 0; i < cmd_length(s); i++) {
+		if (!(i % 4))
+			pr_err("\n%08x ", cmd_val(s, i));
+		else
+			pr_err("%08x ", cmd_val(s, i));
+	}
+	pr_err("\nvgpu%d: aperture 0x%llx - 0x%llx, hidden 0x%llx - 0x%llx\n",
+			vgpu->id,
+			vgpu_aperture_gmadr_base(vgpu),
+			vgpu_aperture_gmadr_end(vgpu),
+			vgpu_hidden_gmadr_base(vgpu),
+			vgpu_hidden_gmadr_end(vgpu));
+	return ret;
+}
+
+static int cmd_handler_mi_store_data_imm(struct parser_exec_state *s)
+{
+	int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	int op_size = (cmd_length(s) - 3) * sizeof(u32);
+	int core_id = (cmd_val(s, 2) & (1 << 0)) ? 1 : 0;
+	unsigned long gma, gma_low, gma_high;
+	u32 valid_len = CMD_LEN(2);
+	int ret = 0;
+
+	/* check ppggt */
+	if (!(cmd_val(s, 0) & (1 << 22)))
+		return 0;
+
+	/* check if QWORD */
+	if (DWORD_FIELD(0, 21, 21))
+		valid_len++;
+	ret = gvt_check_valid_cmd_length(cmd_length(s),
+			valid_len);
+	if (ret)
+		return ret;
+
+	gma = cmd_val(s, 2) & GENMASK(31, 2);
+
+	if (gmadr_bytes == 8) {
+		gma_low = cmd_val(s, 1) & GENMASK(31, 2);
+		gma_high = cmd_val(s, 2) & GENMASK(15, 0);
+		gma = (gma_high << 32) | gma_low;
+		core_id = (cmd_val(s, 1) & (1 << 0)) ? 1 : 0;
+	}
+	ret = cmd_address_audit(s, gma + op_size * core_id, op_size, false);
+	return ret;
+}
+
+static inline int unexpected_cmd(struct parser_exec_state *s)
+{
+	struct intel_vgpu *vgpu = s->vgpu;
+
+	gvt_vgpu_err("Unexpected %s in command buffer!\n", s->info->name);
+
+	return -EBADRQC;
+}
+
+static int cmd_handler_mi_semaphore_wait(struct parser_exec_state *s)
+{
+	return unexpected_cmd(s);
+}
+
+static int cmd_handler_mi_report_perf_count(struct parser_exec_state *s)
+{
+	return unexpected_cmd(s);
+}
+
+static int cmd_handler_mi_op_2e(struct parser_exec_state *s)
+{
+	return unexpected_cmd(s);
+}
+
+static int cmd_handler_mi_op_2f(struct parser_exec_state *s)
+{
+	int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	int op_size = (1 << ((cmd_val(s, 0) & GENMASK(20, 19)) >> 19)) *
+			sizeof(u32);
+	unsigned long gma, gma_high;
+	u32 valid_len = CMD_LEN(1);
+	int ret = 0;
+
+	if (!(cmd_val(s, 0) & (1 << 22)))
+		return ret;
+
+	/* check inline data */
+	if (cmd_val(s, 0) & BIT(18))
+		valid_len = CMD_LEN(9);
+	ret = gvt_check_valid_cmd_length(cmd_length(s),
+			valid_len);
+	if (ret)
+		return ret;
+
+	gma = cmd_val(s, 1) & GENMASK(31, 2);
+	if (gmadr_bytes == 8) {
+		gma_high = cmd_val(s, 2) & GENMASK(15, 0);
+		gma = (gma_high << 32) | gma;
+	}
+	ret = cmd_address_audit(s, gma, op_size, false);
+	return ret;
+}
+
+static int cmd_handler_mi_store_data_index(struct parser_exec_state *s)
+{
+	return unexpected_cmd(s);
+}
+
+static int cmd_handler_mi_clflush(struct parser_exec_state *s)
+{
+	return unexpected_cmd(s);
+}
+
+static int cmd_handler_mi_conditional_batch_buffer_end(
+		struct parser_exec_state *s)
+{
+	return unexpected_cmd(s);
+}
+
+static int cmd_handler_mi_update_gtt(struct parser_exec_state *s)
+{
+	return unexpected_cmd(s);
+}
+
+static int cmd_handler_mi_flush_dw(struct parser_exec_state *s)
+{
+	int gmadr_bytes = s->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	unsigned long gma;
+	bool index_mode = false;
+	int ret = 0;
+	u32 hws_pga, val;
+	u32 valid_len = CMD_LEN(2);
+
+	ret = gvt_check_valid_cmd_length(cmd_length(s),
+			valid_len);
+	if (ret) {
+		/* Check again for Qword */
+		ret = gvt_check_valid_cmd_length(cmd_length(s),
+			++valid_len);
+		return ret;
+	}
+
+	/* Check post-sync and ppgtt bit */
+	if (((cmd_val(s, 0) >> 14) & 0x3) && (cmd_val(s, 1) & (1 << 2))) {
+		gma = cmd_val(s, 1) & GENMASK(31, 3);
+		if (gmadr_bytes == 8)
+			gma |= (cmd_val(s, 2) & GENMASK(15, 0)) << 32;
+		/* Store Data Index */
+		if (cmd_val(s, 0) & (1 << 21))
+			index_mode = true;
+		ret = cmd_address_audit(s, gma, sizeof(u64), index_mode);
+		if (ret)
+			return ret;
+		if (index_mode) {
+			hws_pga = s->vgpu->hws_pga[s->engine->id];
+			gma = hws_pga + gma;
+			patch_value(s, cmd_ptr(s, 1), gma);
+			val = cmd_val(s, 0) & (~(1 << 21));
+			patch_value(s, cmd_ptr(s, 0), val);
+		}
+	}
+	/* Check notify bit */
+	if ((cmd_val(s, 0) & (1 << 8)))
+		set_bit(cmd_interrupt_events[s->engine->id].mi_flush_dw,
+			s->workload->pending_events);
+	return ret;
+}
+
+static void addr_type_update_snb(struct parser_exec_state *s)
+{
+	if ((s->buf_type == RING_BUFFER_INSTRUCTION) &&
+			(BATCH_BUFFER_ADR_SPACE_BIT(cmd_val(s, 0)) == 1)) {
+		s->buf_addr_type = PPGTT_BUFFER;
+	}
+}
+
+
+static int copy_gma_to_hva(struct intel_vgpu *vgpu, struct intel_vgpu_mm *mm,
+		unsigned long gma, unsigned long end_gma, void *va)
+{
+	unsigned long copy_len, offset;
+	unsigned long len = 0;
+	unsigned long gpa;
+
+	while (gma != end_gma) {
+		gpa = intel_vgpu_gma_to_gpa(mm, gma);
+		if (gpa == INTEL_GVT_INVALID_ADDR) {
+			gvt_vgpu_err("invalid gma address: %lx\n", gma);
+			return -EFAULT;
+		}
+
+		offset = gma & (I915_GTT_PAGE_SIZE - 1);
+
+		copy_len = (end_gma - gma) >= (I915_GTT_PAGE_SIZE - offset) ?
+			I915_GTT_PAGE_SIZE - offset : end_gma - gma;
+
+		intel_gvt_read_gpa(vgpu, gpa, va + len, copy_len);
+
+		len += copy_len;
+		gma += copy_len;
+	}
+	return len;
+}
+
+
+/*
+ * Check whether a batch buffer needs to be scanned. Currently
+ * the only criteria is based on privilege.
+ */
+static int batch_buffer_needs_scan(struct parser_exec_state *s)
+{
+	/* Decide privilege based on address space */
+	if (cmd_val(s, 0) & BIT(8) &&
+	    !(s->vgpu->scan_nonprivbb & s->engine->mask))
+		return 0;
+
+	return 1;
+}
+
+static const char *repr_addr_type(unsigned int type)
+{
+	return type == PPGTT_BUFFER ? "ppgtt" : "ggtt";
+}
+
+static int find_bb_size(struct parser_exec_state *s,
+			unsigned long *bb_size,
+			unsigned long *bb_end_cmd_offset)
+{
+	unsigned long gma = 0;
+	const struct cmd_info *info;
+	u32 cmd_len = 0;
+	bool bb_end = false;
+	struct intel_vgpu *vgpu = s->vgpu;
+	u32 cmd;
+	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
+		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
+
+	*bb_size = 0;
+	*bb_end_cmd_offset = 0;
+
+	/* get the start gm address of the batch buffer */
+	gma = get_gma_bb_from_cmd(s, 1);
+	if (gma == INTEL_GVT_INVALID_ADDR)
+		return -EFAULT;
+
+	cmd = cmd_val(s, 0);
+	info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
+	if (info == NULL) {
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
+			     cmd, get_opcode(cmd, s->engine),
+			     repr_addr_type(s->buf_addr_type),
+			     s->engine->name, s->workload);
+		return -EBADRQC;
+	}
+	do {
+		if (copy_gma_to_hva(s->vgpu, mm,
+				    gma, gma + 4, &cmd) < 0)
+			return -EFAULT;
+		info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
+		if (info == NULL) {
+			gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
+				     cmd, get_opcode(cmd, s->engine),
+				     repr_addr_type(s->buf_addr_type),
+				     s->engine->name, s->workload);
+			return -EBADRQC;
+		}
+
+		if (info->opcode == OP_MI_BATCH_BUFFER_END) {
+			bb_end = true;
+		} else if (info->opcode == OP_MI_BATCH_BUFFER_START) {
+			if (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0)
+				/* chained batch buffer */
+				bb_end = true;
+		}
+
+		if (bb_end)
+			*bb_end_cmd_offset = *bb_size;
+
+		cmd_len = get_cmd_length(info, cmd) << 2;
+		*bb_size += cmd_len;
+		gma += cmd_len;
+	} while (!bb_end);
+
+	return 0;
+}
+
+static int audit_bb_end(struct parser_exec_state *s, void *va)
+{
+	struct intel_vgpu *vgpu = s->vgpu;
+	u32 cmd = *(u32 *)va;
+	const struct cmd_info *info;
+
+	info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
+	if (info == NULL) {
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
+			     cmd, get_opcode(cmd, s->engine),
+			     repr_addr_type(s->buf_addr_type),
+			     s->engine->name, s->workload);
+		return -EBADRQC;
+	}
+
+	if ((info->opcode == OP_MI_BATCH_BUFFER_END) ||
+	    ((info->opcode == OP_MI_BATCH_BUFFER_START) &&
+	     (BATCH_BUFFER_2ND_LEVEL_BIT(cmd) == 0)))
+		return 0;
+
+	return -EBADRQC;
+}
+
+static int perform_bb_shadow(struct parser_exec_state *s)
+{
+	struct intel_vgpu *vgpu = s->vgpu;
+	struct intel_vgpu_shadow_bb *bb;
+	unsigned long gma = 0;
+	unsigned long bb_size;
+	unsigned long bb_end_cmd_offset;
+	int ret = 0;
+	struct intel_vgpu_mm *mm = (s->buf_addr_type == GTT_BUFFER) ?
+		s->vgpu->gtt.ggtt_mm : s->workload->shadow_mm;
+	unsigned long start_offset = 0;
+
+	/* get the start gm address of the batch buffer */
+	gma = get_gma_bb_from_cmd(s, 1);
+	if (gma == INTEL_GVT_INVALID_ADDR)
+		return -EFAULT;
+
+	ret = find_bb_size(s, &bb_size, &bb_end_cmd_offset);
+	if (ret)
+		return ret;
+
+	bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+	if (!bb)
+		return -ENOMEM;
+
+	bb->ppgtt = (s->buf_addr_type == GTT_BUFFER) ? false : true;
+
+	/* the start_offset stores the batch buffer's start gma's
+	 * offset relative to page boundary. so for non-privileged batch
+	 * buffer, the shadowed gem object holds exactly the same page
+	 * layout as original gem object. This is for the convience of
+	 * replacing the whole non-privilged batch buffer page to this
+	 * shadowed one in PPGTT at the same gma address. (this replacing
+	 * action is not implemented yet now, but may be necessary in
+	 * future).
+	 * for prileged batch buffer, we just change start gma address to
+	 * that of shadowed page.
+	 */
+	if (bb->ppgtt)
+		start_offset = gma & ~I915_GTT_PAGE_MASK;
+
+	bb->obj = i915_gem_object_create_shmem(s->engine->i915,
+					       round_up(bb_size + start_offset,
+							PAGE_SIZE));
+	if (IS_ERR(bb->obj)) {
+		ret = PTR_ERR(bb->obj);
+		goto err_free_bb;
+	}
+
+	bb->va = i915_gem_object_pin_map(bb->obj, I915_MAP_WB);
+	if (IS_ERR(bb->va)) {
+		ret = PTR_ERR(bb->va);
+		goto err_free_obj;
+	}
+
+	ret = copy_gma_to_hva(s->vgpu, mm,
+			      gma, gma + bb_size,
+			      bb->va + start_offset);
+	if (ret < 0) {
+		gvt_vgpu_err("fail to copy guest ring buffer\n");
+		ret = -EFAULT;
+		goto err_unmap;
+	}
+
+	ret = audit_bb_end(s, bb->va + start_offset + bb_end_cmd_offset);
+	if (ret)
+		goto err_unmap;
+
+	i915_gem_object_unlock(bb->obj);
+	INIT_LIST_HEAD(&bb->list);
+	list_add(&bb->list, &s->workload->shadow_bb);
+
+	bb->bb_start_cmd_va = s->ip_va;
+
+	if ((s->buf_type == BATCH_BUFFER_INSTRUCTION) && (!s->is_ctx_wa))
+		bb->bb_offset = s->ip_va - s->rb_va;
+	else
+		bb->bb_offset = 0;
+
+	/*
+	 * ip_va saves the virtual address of the shadow batch buffer, while
+	 * ip_gma saves the graphics address of the original batch buffer.
+	 * As the shadow batch buffer is just a copy from the originial one,
+	 * it should be right to use shadow batch buffer'va and original batch
+	 * buffer's gma in pair. After all, we don't want to pin the shadow
+	 * buffer here (too early).
+	 */
+	s->ip_va = bb->va + start_offset;
+	s->ip_gma = gma;
+	return 0;
+err_unmap:
+	i915_gem_object_unpin_map(bb->obj);
+err_free_obj:
+	i915_gem_object_put(bb->obj);
+err_free_bb:
+	kfree(bb);
+	return ret;
+}
+
+static int cmd_handler_mi_batch_buffer_start(struct parser_exec_state *s)
+{
+	bool second_level;
+	int ret = 0;
+	struct intel_vgpu *vgpu = s->vgpu;
+
+	if (s->buf_type == BATCH_BUFFER_2ND_LEVEL) {
+		gvt_vgpu_err("Found MI_BATCH_BUFFER_START in 2nd level BB\n");
+		return -EFAULT;
+	}
+
+	second_level = BATCH_BUFFER_2ND_LEVEL_BIT(cmd_val(s, 0)) == 1;
+	if (second_level && (s->buf_type != BATCH_BUFFER_INSTRUCTION)) {
+		gvt_vgpu_err("Jumping to 2nd level BB from RB is not allowed\n");
+		return -EFAULT;
+	}
+
+	s->saved_buf_addr_type = s->buf_addr_type;
+	addr_type_update_snb(s);
+	if (s->buf_type == RING_BUFFER_INSTRUCTION) {
+		s->ret_ip_gma_ring = s->ip_gma + cmd_length(s) * sizeof(u32);
+		s->buf_type = BATCH_BUFFER_INSTRUCTION;
+	} else if (second_level) {
+		s->buf_type = BATCH_BUFFER_2ND_LEVEL;
+		s->ret_ip_gma_bb = s->ip_gma + cmd_length(s) * sizeof(u32);
+		s->ret_bb_va = s->ip_va + cmd_length(s) * sizeof(u32);
+	}
+
+	if (batch_buffer_needs_scan(s)) {
+		ret = perform_bb_shadow(s);
+		if (ret < 0)
+			gvt_vgpu_err("invalid shadow batch buffer\n");
+	} else {
+		/* emulate a batch buffer end to do return right */
+		ret = cmd_handler_mi_batch_buffer_end(s);
+		if (ret < 0)
+			return ret;
+	}
+	return ret;
+}
+
+static int mi_noop_index;
+
+static const struct cmd_info cmd_info[] = {
+	{"MI_NOOP", OP_MI_NOOP, F_LEN_CONST, R_ALL, D_ALL, 0, 1, NULL},
+
+	{"MI_SET_PREDICATE", OP_MI_SET_PREDICATE, F_LEN_CONST, R_ALL, D_ALL,
+		0, 1, NULL},
+
+	{"MI_USER_INTERRUPT", OP_MI_USER_INTERRUPT, F_LEN_CONST, R_ALL, D_ALL,
+		0, 1, cmd_handler_mi_user_interrupt},
+
+	{"MI_WAIT_FOR_EVENT", OP_MI_WAIT_FOR_EVENT, F_LEN_CONST, R_RCS | R_BCS,
+		D_ALL, 0, 1, cmd_handler_mi_wait_for_event},
+
+	{"MI_FLUSH", OP_MI_FLUSH, F_LEN_CONST, R_ALL, D_ALL, 0, 1, NULL},
+
+	{"MI_ARB_CHECK", OP_MI_ARB_CHECK, F_LEN_CONST, R_ALL, D_ALL, 0, 1,
+		NULL},
+
+	{"MI_RS_CONTROL", OP_MI_RS_CONTROL, F_LEN_CONST, R_RCS, D_ALL, 0, 1,
+		NULL},
+
+	{"MI_REPORT_HEAD", OP_MI_REPORT_HEAD, F_LEN_CONST, R_ALL, D_ALL, 0, 1,
+		NULL},
+
+	{"MI_ARB_ON_OFF", OP_MI_ARB_ON_OFF, F_LEN_CONST, R_ALL, D_ALL, 0, 1,
+		NULL},
+
+	{"MI_URB_ATOMIC_ALLOC", OP_MI_URB_ATOMIC_ALLOC, F_LEN_CONST, R_RCS,
+		D_ALL, 0, 1, NULL},
+
+	{"MI_BATCH_BUFFER_END", OP_MI_BATCH_BUFFER_END,
+		F_IP_ADVANCE_CUSTOM | F_LEN_CONST, R_ALL, D_ALL, 0, 1,
+		cmd_handler_mi_batch_buffer_end},
+
+	{"MI_SUSPEND_FLUSH", OP_MI_SUSPEND_FLUSH, F_LEN_CONST, R_ALL, D_ALL,
+		0, 1, NULL},
+
+	{"MI_PREDICATE", OP_MI_PREDICATE, F_LEN_CONST, R_RCS, D_ALL, 0, 1,
+		NULL},
+
+	{"MI_TOPOLOGY_FILTER", OP_MI_TOPOLOGY_FILTER, F_LEN_CONST, R_ALL,
+		D_ALL, 0, 1, NULL},
+
+	{"MI_SET_APPID", OP_MI_SET_APPID, F_LEN_CONST, R_ALL, D_ALL, 0, 1,
+		NULL},
+
+	{"MI_RS_CONTEXT", OP_MI_RS_CONTEXT, F_LEN_CONST, R_RCS, D_ALL, 0, 1,
+		NULL},
+
+	{"MI_DISPLAY_FLIP", OP_MI_DISPLAY_FLIP, F_LEN_VAR,
+		R_RCS | R_BCS, D_ALL, 0, 8, cmd_handler_mi_display_flip},
+
+	{"MI_SEMAPHORE_MBOX", OP_MI_SEMAPHORE_MBOX, F_LEN_VAR | F_LEN_VAR_FIXED,
+		R_ALL, D_ALL, 0, 8, NULL, CMD_LEN(1)},
+
+	{"MI_MATH", OP_MI_MATH, F_LEN_VAR, R_ALL, D_ALL, 0, 8, NULL},
+
+	{"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS,
+		D_ALL, 0, 8, NULL, CMD_LEN(0)},
+
+	{"MI_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL,
+		F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, 0, 8,
+		NULL, CMD_LEN(0)},
+
+	{"MI_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT,
+		F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS, ADDR_FIX_1(2),
+		8, cmd_handler_mi_semaphore_wait, CMD_LEN(2)},
+
+	{"MI_STORE_DATA_IMM", OP_MI_STORE_DATA_IMM, F_LEN_VAR, R_ALL, D_BDW_PLUS,
+		ADDR_FIX_1(1), 10, cmd_handler_mi_store_data_imm},
+
+	{"MI_STORE_DATA_INDEX", OP_MI_STORE_DATA_INDEX, F_LEN_VAR, R_ALL, D_ALL,
+		0, 8, cmd_handler_mi_store_data_index},
+
+	{"MI_LOAD_REGISTER_IMM", OP_MI_LOAD_REGISTER_IMM, F_LEN_VAR, R_ALL,
+		D_ALL, 0, 8, cmd_handler_lri},
+
+	{"MI_UPDATE_GTT", OP_MI_UPDATE_GTT, F_LEN_VAR, R_ALL, D_BDW_PLUS, 0, 10,
+		cmd_handler_mi_update_gtt},
+
+	{"MI_STORE_REGISTER_MEM", OP_MI_STORE_REGISTER_MEM,
+		F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
+		cmd_handler_srm, CMD_LEN(2)},
+
+	{"MI_FLUSH_DW", OP_MI_FLUSH_DW, F_LEN_VAR, R_ALL, D_ALL, 0, 6,
+		cmd_handler_mi_flush_dw},
+
+	{"MI_CLFLUSH", OP_MI_CLFLUSH, F_LEN_VAR, R_ALL, D_ALL, ADDR_FIX_1(1),
+		10, cmd_handler_mi_clflush},
+
+	{"MI_REPORT_PERF_COUNT", OP_MI_REPORT_PERF_COUNT,
+		F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(1), 6,
+		cmd_handler_mi_report_perf_count, CMD_LEN(2)},
+
+	{"MI_LOAD_REGISTER_MEM", OP_MI_LOAD_REGISTER_MEM,
+		F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
+		cmd_handler_lrm, CMD_LEN(2)},
+
+	{"MI_LOAD_REGISTER_REG", OP_MI_LOAD_REGISTER_REG,
+		F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, 0, 8,
+		cmd_handler_lrr, CMD_LEN(1)},
+
+	{"MI_RS_STORE_DATA_IMM", OP_MI_RS_STORE_DATA_IMM,
+		F_LEN_VAR | F_LEN_VAR_FIXED, R_RCS, D_ALL, 0,
+		8, NULL, CMD_LEN(2)},
+
+	{"MI_LOAD_URB_MEM", OP_MI_LOAD_URB_MEM, F_LEN_VAR | F_LEN_VAR_FIXED,
+		R_RCS, D_ALL, ADDR_FIX_1(2), 8, NULL, CMD_LEN(2)},
+
+	{"MI_STORE_URM_MEM", OP_MI_STORE_URM_MEM, F_LEN_VAR, R_RCS, D_ALL,
+		ADDR_FIX_1(2), 8, NULL},
+
+	{"MI_OP_2E", OP_MI_2E, F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_BDW_PLUS,
+		ADDR_FIX_2(1, 2), 8, cmd_handler_mi_op_2e, CMD_LEN(3)},
+
+	{"MI_OP_2F", OP_MI_2F, F_LEN_VAR, R_ALL, D_BDW_PLUS, ADDR_FIX_1(1),
+		8, cmd_handler_mi_op_2f},
+
+	{"MI_BATCH_BUFFER_START", OP_MI_BATCH_BUFFER_START,
+		F_IP_ADVANCE_CUSTOM, R_ALL, D_ALL, 0, 8,
+		cmd_handler_mi_batch_buffer_start},
+
+	{"MI_CONDITIONAL_BATCH_BUFFER_END", OP_MI_CONDITIONAL_BATCH_BUFFER_END,
+		F_LEN_VAR | F_LEN_VAR_FIXED, R_ALL, D_ALL, ADDR_FIX_1(2), 8,
+		cmd_handler_mi_conditional_batch_buffer_end, CMD_LEN(2)},
+
+	{"MI_LOAD_SCAN_LINES_INCL", OP_MI_LOAD_SCAN_LINES_INCL, F_LEN_CONST,
+		R_RCS | R_BCS, D_ALL, 0, 2, NULL},
+
+	{"XY_SETUP_BLT", OP_XY_SETUP_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		ADDR_FIX_2(4, 7), 8, NULL},
+
+	{"XY_SETUP_CLIP_BLT", OP_XY_SETUP_CLIP_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		0, 8, NULL},
+
+	{"XY_SETUP_MONO_PATTERN_SL_BLT", OP_XY_SETUP_MONO_PATTERN_SL_BLT,
+		F_LEN_VAR, R_BCS, D_ALL, ADDR_FIX_1(4), 8, NULL},
+
+	{"XY_PIXEL_BLT", OP_XY_PIXEL_BLT, F_LEN_VAR, R_BCS, D_ALL, 0, 8, NULL},
+
+	{"XY_SCANLINES_BLT", OP_XY_SCANLINES_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		0, 8, NULL},
+
+	{"XY_TEXT_BLT", OP_XY_TEXT_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		ADDR_FIX_1(3), 8, NULL},
+
+	{"XY_TEXT_IMMEDIATE_BLT", OP_XY_TEXT_IMMEDIATE_BLT, F_LEN_VAR, R_BCS,
+		D_ALL, 0, 8, NULL},
+
+	{"XY_COLOR_BLT", OP_XY_COLOR_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		ADDR_FIX_1(4), 8, NULL},
+
+	{"XY_PAT_BLT", OP_XY_PAT_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		ADDR_FIX_2(4, 5), 8, NULL},
+
+	{"XY_MONO_PAT_BLT", OP_XY_MONO_PAT_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		ADDR_FIX_1(4), 8, NULL},
+
+	{"XY_SRC_COPY_BLT", OP_XY_SRC_COPY_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		ADDR_FIX_2(4, 7), 8, NULL},
+
+	{"XY_MONO_SRC_COPY_BLT", OP_XY_MONO_SRC_COPY_BLT, F_LEN_VAR, R_BCS,
+		D_ALL, ADDR_FIX_2(4, 5), 8, NULL},
+
+	{"XY_FULL_BLT", OP_XY_FULL_BLT, F_LEN_VAR, R_BCS, D_ALL, 0, 8, NULL},
+
+	{"XY_FULL_MONO_SRC_BLT", OP_XY_FULL_MONO_SRC_BLT, F_LEN_VAR, R_BCS,
+		D_ALL, ADDR_FIX_3(4, 5, 8), 8, NULL},
+
+	{"XY_FULL_MONO_PATTERN_BLT", OP_XY_FULL_MONO_PATTERN_BLT, F_LEN_VAR,
+		R_BCS, D_ALL, ADDR_FIX_2(4, 7), 8, NULL},
+
+	{"XY_FULL_MONO_PATTERN_MONO_SRC_BLT",
+		OP_XY_FULL_MONO_PATTERN_MONO_SRC_BLT,
+		F_LEN_VAR, R_BCS, D_ALL, ADDR_FIX_2(4, 5), 8, NULL},
+
+	{"XY_MONO_PAT_FIXED_BLT", OP_XY_MONO_PAT_FIXED_BLT, F_LEN_VAR, R_BCS,
+		D_ALL, ADDR_FIX_1(4), 8, NULL},
+
+	{"XY_MONO_SRC_COPY_IMMEDIATE_BLT", OP_XY_MONO_SRC_COPY_IMMEDIATE_BLT,
+		F_LEN_VAR, R_BCS, D_ALL, ADDR_FIX_1(4), 8, NULL},
+
+	{"XY_PAT_BLT_IMMEDIATE", OP_XY_PAT_BLT_IMMEDIATE, F_LEN_VAR, R_BCS,
+		D_ALL, ADDR_FIX_1(4), 8, NULL},
+
+	{"XY_SRC_COPY_CHROMA_BLT", OP_XY_SRC_COPY_CHROMA_BLT, F_LEN_VAR, R_BCS,
+		D_ALL, ADDR_FIX_2(4, 7), 8, NULL},
+
+	{"XY_FULL_IMMEDIATE_PATTERN_BLT", OP_XY_FULL_IMMEDIATE_PATTERN_BLT,
+		F_LEN_VAR, R_BCS, D_ALL, ADDR_FIX_2(4, 7), 8, NULL},
+
+	{"XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT",
+		OP_XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT,
+		F_LEN_VAR, R_BCS, D_ALL, ADDR_FIX_2(4, 5), 8, NULL},
+
+	{"XY_PAT_CHROMA_BLT", OP_XY_PAT_CHROMA_BLT, F_LEN_VAR, R_BCS, D_ALL,
+		ADDR_FIX_2(4, 5), 8, NULL},
+
+	{"XY_PAT_CHROMA_BLT_IMMEDIATE", OP_XY_PAT_CHROMA_BLT_IMMEDIATE,
+		F_LEN_VAR, R_BCS, D_ALL, ADDR_FIX_1(4), 8, NULL},
+
+	{"3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP",
+		OP_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_VIEWPORT_STATE_POINTERS_CC",
+		OP_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_BLEND_STATE_POINTERS",
+		OP_3DSTATE_BLEND_STATE_POINTERS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DEPTH_STENCIL_STATE_POINTERS",
+		OP_3DSTATE_DEPTH_STENCIL_STATE_POINTERS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_BINDING_TABLE_POINTERS_VS",
+		OP_3DSTATE_BINDING_TABLE_POINTERS_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_BINDING_TABLE_POINTERS_HS",
+		OP_3DSTATE_BINDING_TABLE_POINTERS_HS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_BINDING_TABLE_POINTERS_DS",
+		OP_3DSTATE_BINDING_TABLE_POINTERS_DS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_BINDING_TABLE_POINTERS_GS",
+		OP_3DSTATE_BINDING_TABLE_POINTERS_GS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_BINDING_TABLE_POINTERS_PS",
+		OP_3DSTATE_BINDING_TABLE_POINTERS_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SAMPLER_STATE_POINTERS_VS",
+		OP_3DSTATE_SAMPLER_STATE_POINTERS_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SAMPLER_STATE_POINTERS_HS",
+		OP_3DSTATE_SAMPLER_STATE_POINTERS_HS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SAMPLER_STATE_POINTERS_DS",
+		OP_3DSTATE_SAMPLER_STATE_POINTERS_DS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SAMPLER_STATE_POINTERS_GS",
+		OP_3DSTATE_SAMPLER_STATE_POINTERS_GS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SAMPLER_STATE_POINTERS_PS",
+		OP_3DSTATE_SAMPLER_STATE_POINTERS_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_URB_VS", OP_3DSTATE_URB_VS, F_LEN_VAR, R_RCS, D_ALL,
+		0, 8, NULL},
+
+	{"3DSTATE_URB_HS", OP_3DSTATE_URB_HS, F_LEN_VAR, R_RCS, D_ALL,
+		0, 8, NULL},
+
+	{"3DSTATE_URB_DS", OP_3DSTATE_URB_DS, F_LEN_VAR, R_RCS, D_ALL,
+		0, 8, NULL},
+
+	{"3DSTATE_URB_GS", OP_3DSTATE_URB_GS, F_LEN_VAR, R_RCS, D_ALL,
+		0, 8, NULL},
+
+	{"3DSTATE_GATHER_CONSTANT_VS", OP_3DSTATE_GATHER_CONSTANT_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_GATHER_CONSTANT_GS", OP_3DSTATE_GATHER_CONSTANT_GS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_GATHER_CONSTANT_HS", OP_3DSTATE_GATHER_CONSTANT_HS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_GATHER_CONSTANT_DS", OP_3DSTATE_GATHER_CONSTANT_DS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_GATHER_CONSTANT_PS", OP_3DSTATE_GATHER_CONSTANT_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DX9_CONSTANTF_VS", OP_3DSTATE_DX9_CONSTANTF_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 11, NULL},
+
+	{"3DSTATE_DX9_CONSTANTF_PS", OP_3DSTATE_DX9_CONSTANTF_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 11, NULL},
+
+	{"3DSTATE_DX9_CONSTANTI_VS", OP_3DSTATE_DX9_CONSTANTI_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DX9_CONSTANTI_PS", OP_3DSTATE_DX9_CONSTANTI_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DX9_CONSTANTB_VS", OP_3DSTATE_DX9_CONSTANTB_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DX9_CONSTANTB_PS", OP_3DSTATE_DX9_CONSTANTB_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DX9_LOCAL_VALID_VS", OP_3DSTATE_DX9_LOCAL_VALID_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DX9_LOCAL_VALID_PS", OP_3DSTATE_DX9_LOCAL_VALID_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DX9_GENERATE_ACTIVE_VS", OP_3DSTATE_DX9_GENERATE_ACTIVE_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DX9_GENERATE_ACTIVE_PS", OP_3DSTATE_DX9_GENERATE_ACTIVE_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_BINDING_TABLE_EDIT_VS", OP_3DSTATE_BINDING_TABLE_EDIT_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 9, NULL},
+
+	{"3DSTATE_BINDING_TABLE_EDIT_GS", OP_3DSTATE_BINDING_TABLE_EDIT_GS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 9, NULL},
+
+	{"3DSTATE_BINDING_TABLE_EDIT_HS", OP_3DSTATE_BINDING_TABLE_EDIT_HS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 9, NULL},
+
+	{"3DSTATE_BINDING_TABLE_EDIT_DS", OP_3DSTATE_BINDING_TABLE_EDIT_DS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 9, NULL},
+
+	{"3DSTATE_BINDING_TABLE_EDIT_PS", OP_3DSTATE_BINDING_TABLE_EDIT_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 9, NULL},
+
+	{"3DSTATE_VF_INSTANCING", OP_3DSTATE_VF_INSTANCING, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_VF_SGVS", OP_3DSTATE_VF_SGVS, F_LEN_VAR, R_RCS, D_BDW_PLUS, 0, 8,
+		NULL},
+
+	{"3DSTATE_VF_TOPOLOGY", OP_3DSTATE_VF_TOPOLOGY, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_WM_CHROMAKEY", OP_3DSTATE_WM_CHROMAKEY, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_PS_BLEND", OP_3DSTATE_PS_BLEND, F_LEN_VAR, R_RCS, D_BDW_PLUS, 0,
+		8, NULL},
+
+	{"3DSTATE_WM_DEPTH_STENCIL", OP_3DSTATE_WM_DEPTH_STENCIL, F_LEN_VAR,
+		R_RCS, D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_PS_EXTRA", OP_3DSTATE_PS_EXTRA, F_LEN_VAR, R_RCS, D_BDW_PLUS, 0,
+		8, NULL},
+
+	{"3DSTATE_RASTER", OP_3DSTATE_RASTER, F_LEN_VAR, R_RCS, D_BDW_PLUS, 0, 8,
+		NULL},
+
+	{"3DSTATE_SBE_SWIZ", OP_3DSTATE_SBE_SWIZ, F_LEN_VAR, R_RCS, D_BDW_PLUS, 0, 8,
+		NULL},
+
+	{"3DSTATE_WM_HZ_OP", OP_3DSTATE_WM_HZ_OP, F_LEN_VAR, R_RCS, D_BDW_PLUS, 0, 8,
+		NULL},
+
+	{"3DSTATE_VERTEX_BUFFERS", OP_3DSTATE_VERTEX_BUFFERS, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_VERTEX_ELEMENTS", OP_3DSTATE_VERTEX_ELEMENTS, F_LEN_VAR,
+		R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_INDEX_BUFFER", OP_3DSTATE_INDEX_BUFFER, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, ADDR_FIX_1(2), 8, NULL},
+
+	{"3DSTATE_VF_STATISTICS", OP_3DSTATE_VF_STATISTICS, F_LEN_CONST,
+		R_RCS, D_ALL, 0, 1, NULL},
+
+	{"3DSTATE_VF", OP_3DSTATE_VF, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_CC_STATE_POINTERS", OP_3DSTATE_CC_STATE_POINTERS, F_LEN_VAR,
+		R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SCISSOR_STATE_POINTERS", OP_3DSTATE_SCISSOR_STATE_POINTERS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_GS", OP_3DSTATE_GS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_CLIP", OP_3DSTATE_CLIP, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_WM", OP_3DSTATE_WM, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_CONSTANT_GS", OP_3DSTATE_CONSTANT_GS, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_CONSTANT_PS", OP_3DSTATE_CONSTANT_PS, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_SAMPLE_MASK", OP_3DSTATE_SAMPLE_MASK, F_LEN_VAR, R_RCS,
+		D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_CONSTANT_HS", OP_3DSTATE_CONSTANT_HS, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_CONSTANT_DS", OP_3DSTATE_CONSTANT_DS, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_HS", OP_3DSTATE_HS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_TE", OP_3DSTATE_TE, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DS", OP_3DSTATE_DS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_STREAMOUT", OP_3DSTATE_STREAMOUT, F_LEN_VAR, R_RCS,
+		D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SBE", OP_3DSTATE_SBE, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_PS", OP_3DSTATE_PS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_DRAWING_RECTANGLE", OP_3DSTATE_DRAWING_RECTANGLE, F_LEN_VAR,
+		R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SAMPLER_PALETTE_LOAD0", OP_3DSTATE_SAMPLER_PALETTE_LOAD0,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_CHROMA_KEY", OP_3DSTATE_CHROMA_KEY, F_LEN_VAR, R_RCS, D_ALL,
+		0, 8, NULL},
+
+	{"3DSTATE_DEPTH_BUFFER", OP_3DSTATE_DEPTH_BUFFER, F_LEN_VAR, R_RCS,
+		D_ALL, ADDR_FIX_1(2), 8, NULL},
+
+	{"3DSTATE_POLY_STIPPLE_OFFSET", OP_3DSTATE_POLY_STIPPLE_OFFSET,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_POLY_STIPPLE_PATTERN", OP_3DSTATE_POLY_STIPPLE_PATTERN,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_LINE_STIPPLE", OP_3DSTATE_LINE_STIPPLE, F_LEN_VAR, R_RCS,
+		D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_AA_LINE_PARAMS", OP_3DSTATE_AA_LINE_PARAMS, F_LEN_VAR, R_RCS,
+		D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_GS_SVB_INDEX", OP_3DSTATE_GS_SVB_INDEX, F_LEN_VAR, R_RCS,
+		D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SAMPLER_PALETTE_LOAD1", OP_3DSTATE_SAMPLER_PALETTE_LOAD1,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_MULTISAMPLE", OP_3DSTATE_MULTISAMPLE_BDW, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"3DSTATE_STENCIL_BUFFER", OP_3DSTATE_STENCIL_BUFFER, F_LEN_VAR, R_RCS,
+		D_ALL, ADDR_FIX_1(2), 8, NULL},
+
+	{"3DSTATE_HIER_DEPTH_BUFFER", OP_3DSTATE_HIER_DEPTH_BUFFER, F_LEN_VAR,
+		R_RCS, D_ALL, ADDR_FIX_1(2), 8, NULL},
+
+	{"3DSTATE_CLEAR_PARAMS", OP_3DSTATE_CLEAR_PARAMS, F_LEN_VAR,
+		R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_PUSH_CONSTANT_ALLOC_VS", OP_3DSTATE_PUSH_CONSTANT_ALLOC_VS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_PUSH_CONSTANT_ALLOC_HS", OP_3DSTATE_PUSH_CONSTANT_ALLOC_HS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_PUSH_CONSTANT_ALLOC_DS", OP_3DSTATE_PUSH_CONSTANT_ALLOC_DS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_PUSH_CONSTANT_ALLOC_GS", OP_3DSTATE_PUSH_CONSTANT_ALLOC_GS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_PUSH_CONSTANT_ALLOC_PS", OP_3DSTATE_PUSH_CONSTANT_ALLOC_PS,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_MONOFILTER_SIZE", OP_3DSTATE_MONOFILTER_SIZE, F_LEN_VAR,
+		R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SO_DECL_LIST", OP_3DSTATE_SO_DECL_LIST, F_LEN_VAR, R_RCS,
+		D_ALL, 0, 9, NULL},
+
+	{"3DSTATE_SO_BUFFER", OP_3DSTATE_SO_BUFFER, F_LEN_VAR, R_RCS, D_BDW_PLUS,
+		ADDR_FIX_2(2, 4), 8, NULL},
+
+	{"3DSTATE_BINDING_TABLE_POOL_ALLOC",
+		OP_3DSTATE_BINDING_TABLE_POOL_ALLOC,
+		F_LEN_VAR, R_RCS, D_BDW_PLUS, ADDR_FIX_1(1), 8, NULL},
+
+	{"3DSTATE_GATHER_POOL_ALLOC", OP_3DSTATE_GATHER_POOL_ALLOC,
+		F_LEN_VAR, R_RCS, D_BDW_PLUS, ADDR_FIX_1(1), 8, NULL},
+
+	{"3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC",
+		OP_3DSTATE_DX9_CONSTANT_BUFFER_POOL_ALLOC,
+		F_LEN_VAR, R_RCS, D_BDW_PLUS, ADDR_FIX_1(1), 8, NULL},
+
+	{"3DSTATE_SAMPLE_PATTERN", OP_3DSTATE_SAMPLE_PATTERN, F_LEN_VAR, R_RCS,
+		D_BDW_PLUS, 0, 8, NULL},
+
+	{"PIPE_CONTROL", OP_PIPE_CONTROL, F_LEN_VAR, R_RCS, D_ALL,
+		ADDR_FIX_1(2), 8, cmd_handler_pipe_control},
+
+	{"3DPRIMITIVE", OP_3DPRIMITIVE, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"PIPELINE_SELECT", OP_PIPELINE_SELECT, F_LEN_CONST, R_RCS, D_ALL, 0,
+		1, NULL},
+
+	{"STATE_PREFETCH", OP_STATE_PREFETCH, F_LEN_VAR, R_RCS, D_ALL,
+		ADDR_FIX_1(1), 8, NULL},
+
+	{"STATE_SIP", OP_STATE_SIP, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"STATE_BASE_ADDRESS", OP_STATE_BASE_ADDRESS, F_LEN_VAR, R_RCS, D_BDW_PLUS,
+		ADDR_FIX_5(1, 3, 4, 5, 6), 8, NULL},
+
+	{"OP_3D_MEDIA_0_1_4", OP_3D_MEDIA_0_1_4, F_LEN_VAR, R_RCS, D_ALL,
+		ADDR_FIX_1(1), 8, NULL},
+
+	{"OP_SWTESS_BASE_ADDRESS", OP_SWTESS_BASE_ADDRESS,
+		F_LEN_VAR, R_RCS, D_ALL, ADDR_FIX_2(1, 2), 3, NULL},
+
+	{"3DSTATE_VS", OP_3DSTATE_VS, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_SF", OP_3DSTATE_SF, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
+
+	{"3DSTATE_CONSTANT_VS", OP_3DSTATE_CONSTANT_VS, F_LEN_VAR, R_RCS, D_BDW_PLUS,
+		0, 8, NULL},
+
+	{"3DSTATE_COMPONENT_PACKING", OP_3DSTATE_COMPONENT_PACKING, F_LEN_VAR, R_RCS,
+		D_SKL_PLUS, 0, 8, NULL},
+
+	{"MEDIA_INTERFACE_DESCRIPTOR_LOAD", OP_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
+		F_LEN_VAR, R_RCS, D_ALL, 0, 16, NULL},
+
+	{"MEDIA_GATEWAY_STATE", OP_MEDIA_GATEWAY_STATE, F_LEN_VAR, R_RCS, D_ALL,
+		0, 16, NULL},
+
+	{"MEDIA_STATE_FLUSH", OP_MEDIA_STATE_FLUSH, F_LEN_VAR, R_RCS, D_ALL,
+		0, 16, NULL},
+
+	{"MEDIA_POOL_STATE", OP_MEDIA_POOL_STATE, F_LEN_VAR, R_RCS, D_ALL,
+		0, 16, NULL},
+
+	{"MEDIA_OBJECT", OP_MEDIA_OBJECT, F_LEN_VAR, R_RCS, D_ALL, 0, 16, NULL},
+
+	{"MEDIA_CURBE_LOAD", OP_MEDIA_CURBE_LOAD, F_LEN_VAR, R_RCS, D_ALL,
+		0, 16, NULL},
+
+	{"MEDIA_OBJECT_PRT", OP_MEDIA_OBJECT_PRT, F_LEN_VAR, R_RCS, D_ALL,
+		0, 16, NULL},
+
+	{"MEDIA_OBJECT_WALKER", OP_MEDIA_OBJECT_WALKER, F_LEN_VAR, R_RCS, D_ALL,
+		0, 16, NULL},
+
+	{"GPGPU_WALKER", OP_GPGPU_WALKER, F_LEN_VAR, R_RCS, D_ALL,
+		0, 8, NULL},
+
+	{"MEDIA_VFE_STATE", OP_MEDIA_VFE_STATE, F_LEN_VAR, R_RCS, D_ALL, 0, 16,
+		NULL},
+
+	{"3DSTATE_VF_STATISTICS_GM45", OP_3DSTATE_VF_STATISTICS_GM45,
+		F_LEN_CONST, R_ALL, D_ALL, 0, 1, NULL},
+
+	{"MFX_PIPE_MODE_SELECT", OP_MFX_PIPE_MODE_SELECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_SURFACE_STATE", OP_MFX_SURFACE_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_PIPE_BUF_ADDR_STATE", OP_MFX_PIPE_BUF_ADDR_STATE, F_LEN_VAR,
+		R_VCS, D_BDW_PLUS, 0, 12, NULL},
+
+	{"MFX_IND_OBJ_BASE_ADDR_STATE", OP_MFX_IND_OBJ_BASE_ADDR_STATE,
+		F_LEN_VAR, R_VCS, D_BDW_PLUS, 0, 12, NULL},
+
+	{"MFX_BSP_BUF_BASE_ADDR_STATE", OP_MFX_BSP_BUF_BASE_ADDR_STATE,
+		F_LEN_VAR, R_VCS, D_BDW_PLUS, ADDR_FIX_3(1, 3, 5), 12, NULL},
+
+	{"OP_2_0_0_5", OP_2_0_0_5, F_LEN_VAR, R_VCS, D_BDW_PLUS, 0, 12, NULL},
+
+	{"MFX_STATE_POINTER", OP_MFX_STATE_POINTER, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_QM_STATE", OP_MFX_QM_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_FQM_STATE", OP_MFX_FQM_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_PAK_INSERT_OBJECT", OP_MFX_PAK_INSERT_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_STITCH_OBJECT", OP_MFX_STITCH_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_IT_OBJECT", OP_MFD_IT_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_WAIT", OP_MFX_WAIT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 6, NULL},
+
+	{"MFX_AVC_IMG_STATE", OP_MFX_AVC_IMG_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_AVC_QM_STATE", OP_MFX_AVC_QM_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_AVC_DIRECTMODE_STATE", OP_MFX_AVC_DIRECTMODE_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_AVC_SLICE_STATE", OP_MFX_AVC_SLICE_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_AVC_REF_IDX_STATE", OP_MFX_AVC_REF_IDX_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_AVC_WEIGHTOFFSET_STATE", OP_MFX_AVC_WEIGHTOFFSET_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_AVC_PICID_STATE", OP_MFD_AVC_PICID_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+	{"MFD_AVC_DPB_STATE", OP_MFD_AVC_DPB_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_AVC_BSD_OBJECT", OP_MFD_AVC_BSD_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_AVC_SLICEADDR", OP_MFD_AVC_SLICEADDR, F_LEN_VAR,
+		R_VCS, D_ALL, ADDR_FIX_1(2), 12, NULL},
+
+	{"MFC_AVC_PAK_OBJECT", OP_MFC_AVC_PAK_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_VC1_PRED_PIPE_STATE", OP_MFX_VC1_PRED_PIPE_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_VC1_DIRECTMODE_STATE", OP_MFX_VC1_DIRECTMODE_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_VC1_SHORT_PIC_STATE", OP_MFD_VC1_SHORT_PIC_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_VC1_LONG_PIC_STATE", OP_MFD_VC1_LONG_PIC_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_VC1_BSD_OBJECT", OP_MFD_VC1_BSD_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFC_MPEG2_SLICEGROUP_STATE", OP_MFC_MPEG2_SLICEGROUP_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFC_MPEG2_PAK_OBJECT", OP_MFC_MPEG2_PAK_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_MPEG2_PIC_STATE", OP_MFX_MPEG2_PIC_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_MPEG2_QM_STATE", OP_MFX_MPEG2_QM_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_MPEG2_BSD_OBJECT", OP_MFD_MPEG2_BSD_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_2_6_0_0", OP_MFX_2_6_0_0, F_LEN_VAR, R_VCS, D_ALL,
+		0, 16, NULL},
+
+	{"MFX_2_6_0_9", OP_MFX_2_6_0_9, F_LEN_VAR, R_VCS, D_ALL, 0, 16, NULL},
+
+	{"MFX_2_6_0_8", OP_MFX_2_6_0_8, F_LEN_VAR, R_VCS, D_ALL, 0, 16, NULL},
+
+	{"MFX_JPEG_PIC_STATE", OP_MFX_JPEG_PIC_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFX_JPEG_HUFF_TABLE_STATE", OP_MFX_JPEG_HUFF_TABLE_STATE, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"MFD_JPEG_BSD_OBJECT", OP_MFD_JPEG_BSD_OBJECT, F_LEN_VAR,
+		R_VCS, D_ALL, 0, 12, NULL},
+
+	{"VEBOX_STATE", OP_VEB_STATE, F_LEN_VAR, R_VECS, D_ALL, 0, 12, NULL},
+
+	{"VEBOX_SURFACE_STATE", OP_VEB_SURFACE_STATE, F_LEN_VAR, R_VECS, D_ALL,
+		0, 12, NULL},
+
+	{"VEB_DI_IECP", OP_VEB_DNDI_IECP_STATE, F_LEN_VAR, R_VECS, D_BDW_PLUS,
+		0, 12, NULL},
+};
+
+static void add_cmd_entry(struct intel_gvt *gvt, struct cmd_entry *e)
+{
+	hash_add(gvt->cmd_table, &e->hlist, e->info->opcode);
+}
+
+/* call the cmd handler, and advance ip */
+static int cmd_parser_exec(struct parser_exec_state *s)
+{
+	struct intel_vgpu *vgpu = s->vgpu;
+	const struct cmd_info *info;
+	u32 cmd;
+	int ret = 0;
+
+	cmd = cmd_val(s, 0);
+
+	/* fastpath for MI_NOOP */
+	if (cmd == MI_NOOP)
+		info = &cmd_info[mi_noop_index];
+	else
+		info = get_cmd_info(s->vgpu->gvt, cmd, s->engine);
+
+	if (info == NULL) {
+		gvt_vgpu_err("unknown cmd 0x%x, opcode=0x%x, addr_type=%s, ring %s, workload=%p\n",
+			     cmd, get_opcode(cmd, s->engine),
+			     repr_addr_type(s->buf_addr_type),
+			     s->engine->name, s->workload);
+		return -EBADRQC;
+	}
+
+	s->info = info;
+
+	trace_gvt_command(vgpu->id, s->engine->id, s->ip_gma, s->ip_va,
+			  cmd_length(s), s->buf_type, s->buf_addr_type,
+			  s->workload, info->name);
+
+	if ((info->flag & F_LEN_MASK) == F_LEN_VAR_FIXED) {
+		ret = gvt_check_valid_cmd_length(cmd_length(s),
+						 info->valid_len);
+		if (ret)
+			return ret;
+	}
+
+	if (info->handler) {
+		ret = info->handler(s);
+		if (ret < 0) {
+			gvt_vgpu_err("%s handler error\n", info->name);
+			return ret;
+		}
+	}
+
+	if (!(info->flag & F_IP_ADVANCE_CUSTOM)) {
+		ret = cmd_advance_default(s);
+		if (ret) {
+			gvt_vgpu_err("%s IP advance error\n", info->name);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+static inline bool gma_out_of_range(unsigned long gma,
+		unsigned long gma_head, unsigned int gma_tail)
+{
+	if (gma_tail >= gma_head)
+		return (gma < gma_head) || (gma > gma_tail);
+	else
+		return (gma > gma_tail) && (gma < gma_head);
+}
+
+/* Keep the consistent return type, e.g EBADRQC for unknown
+ * cmd, EFAULT for invalid address, EPERM for nonpriv. later
+ * works as the input of VM healthy status.
+ */
+static int command_scan(struct parser_exec_state *s,
+		unsigned long rb_head, unsigned long rb_tail,
+		unsigned long rb_start, unsigned long rb_len)
+{
+
+	unsigned long gma_head, gma_tail, gma_bottom;
+	int ret = 0;
+	struct intel_vgpu *vgpu = s->vgpu;
+
+	gma_head = rb_start + rb_head;
+	gma_tail = rb_start + rb_tail;
+	gma_bottom = rb_start +  rb_len;
+
+	while (s->ip_gma != gma_tail) {
+		if (s->buf_type == RING_BUFFER_INSTRUCTION ||
+				s->buf_type == RING_BUFFER_CTX) {
+			if (!(s->ip_gma >= rb_start) ||
+				!(s->ip_gma < gma_bottom)) {
+				gvt_vgpu_err("ip_gma %lx out of ring scope."
+					"(base:0x%lx, bottom: 0x%lx)\n",
+					s->ip_gma, rb_start,
+					gma_bottom);
+				parser_exec_state_dump(s);
+				return -EFAULT;
+			}
+			if (gma_out_of_range(s->ip_gma, gma_head, gma_tail)) {
+				gvt_vgpu_err("ip_gma %lx out of range."
+					"base 0x%lx head 0x%lx tail 0x%lx\n",
+					s->ip_gma, rb_start,
+					rb_head, rb_tail);
+				parser_exec_state_dump(s);
+				break;
+			}
+		}
+		ret = cmd_parser_exec(s);
+		if (ret) {
+			gvt_vgpu_err("cmd parser error\n");
+			parser_exec_state_dump(s);
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static int scan_workload(struct intel_vgpu_workload *workload)
+{
+	unsigned long gma_head, gma_tail;
+	struct parser_exec_state s;
+	int ret = 0;
+
+	/* ring base is page aligned */
+	if (WARN_ON(!IS_ALIGNED(workload->rb_start, I915_GTT_PAGE_SIZE)))
+		return -EINVAL;
+
+	gma_head = workload->rb_start + workload->rb_head;
+	gma_tail = workload->rb_start + workload->rb_tail;
+
+	s.buf_type = RING_BUFFER_INSTRUCTION;
+	s.buf_addr_type = GTT_BUFFER;
+	s.vgpu = workload->vgpu;
+	s.engine = workload->engine;
+	s.ring_start = workload->rb_start;
+	s.ring_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
+	s.ring_head = gma_head;
+	s.ring_tail = gma_tail;
+	s.rb_va = workload->shadow_ring_buffer_va;
+	s.workload = workload;
+	s.is_ctx_wa = false;
+
+	if (bypass_scan_mask & workload->engine->mask || gma_head == gma_tail)
+		return 0;
+
+	ret = ip_gma_set(&s, gma_head);
+	if (ret)
+		goto out;
+
+	ret = command_scan(&s, workload->rb_head, workload->rb_tail,
+		workload->rb_start, _RING_CTL_BUF_SIZE(workload->rb_ctl));
+
+out:
+	return ret;
+}
+
+static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+{
+
+	unsigned long gma_head, gma_tail, ring_size, ring_tail;
+	struct parser_exec_state s;
+	int ret = 0;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+				struct intel_vgpu_workload,
+				wa_ctx);
+
+	/* ring base is page aligned */
+	if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma,
+					I915_GTT_PAGE_SIZE)))
+		return -EINVAL;
+
+	ring_tail = wa_ctx->indirect_ctx.size + 3 * sizeof(u32);
+	ring_size = round_up(wa_ctx->indirect_ctx.size + CACHELINE_BYTES,
+			PAGE_SIZE);
+	gma_head = wa_ctx->indirect_ctx.guest_gma;
+	gma_tail = wa_ctx->indirect_ctx.guest_gma + ring_tail;
+
+	s.buf_type = RING_BUFFER_INSTRUCTION;
+	s.buf_addr_type = GTT_BUFFER;
+	s.vgpu = workload->vgpu;
+	s.engine = workload->engine;
+	s.ring_start = wa_ctx->indirect_ctx.guest_gma;
+	s.ring_size = ring_size;
+	s.ring_head = gma_head;
+	s.ring_tail = gma_tail;
+	s.rb_va = wa_ctx->indirect_ctx.shadow_va;
+	s.workload = workload;
+	s.is_ctx_wa = true;
+
+	ret = ip_gma_set(&s, gma_head);
+	if (ret)
+		goto out;
+
+	ret = command_scan(&s, 0, ring_tail,
+		wa_ctx->indirect_ctx.guest_gma, ring_size);
+out:
+	return ret;
+}
+
+static int shadow_workload_ring_buffer(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	unsigned long gma_head, gma_tail, gma_top, guest_rb_size;
+	void *shadow_ring_buffer_va;
+	int ret;
+
+	guest_rb_size = _RING_CTL_BUF_SIZE(workload->rb_ctl);
+
+	/* calculate workload ring buffer size */
+	workload->rb_len = (workload->rb_tail + guest_rb_size -
+			workload->rb_head) % guest_rb_size;
+
+	gma_head = workload->rb_start + workload->rb_head;
+	gma_tail = workload->rb_start + workload->rb_tail;
+	gma_top = workload->rb_start + guest_rb_size;
+
+	if (workload->rb_len > s->ring_scan_buffer_size[workload->engine->id]) {
+		void *p;
+
+		/* realloc the new ring buffer if needed */
+		p = krealloc(s->ring_scan_buffer[workload->engine->id],
+			     workload->rb_len, GFP_KERNEL);
+		if (!p) {
+			gvt_vgpu_err("fail to re-alloc ring scan buffer\n");
+			return -ENOMEM;
+		}
+		s->ring_scan_buffer[workload->engine->id] = p;
+		s->ring_scan_buffer_size[workload->engine->id] = workload->rb_len;
+	}
+
+	shadow_ring_buffer_va = s->ring_scan_buffer[workload->engine->id];
+
+	/* get shadow ring buffer va */
+	workload->shadow_ring_buffer_va = shadow_ring_buffer_va;
+
+	/* head > tail --> copy head <-> top */
+	if (gma_head > gma_tail) {
+		ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm,
+				      gma_head, gma_top, shadow_ring_buffer_va);
+		if (ret < 0) {
+			gvt_vgpu_err("fail to copy guest ring buffer\n");
+			return ret;
+		}
+		shadow_ring_buffer_va += ret;
+		gma_head = workload->rb_start;
+	}
+
+	/* copy head or start <-> tail */
+	ret = copy_gma_to_hva(vgpu, vgpu->gtt.ggtt_mm, gma_head, gma_tail,
+				shadow_ring_buffer_va);
+	if (ret < 0) {
+		gvt_vgpu_err("fail to copy guest ring buffer\n");
+		return ret;
+	}
+	return 0;
+}
+
+int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload)
+{
+	int ret;
+	struct intel_vgpu *vgpu = workload->vgpu;
+
+	ret = shadow_workload_ring_buffer(workload);
+	if (ret) {
+		gvt_vgpu_err("fail to shadow workload ring_buffer\n");
+		return ret;
+	}
+
+	ret = scan_workload(workload);
+	if (ret) {
+		gvt_vgpu_err("scan workload error\n");
+		return ret;
+	}
+	return 0;
+}
+
+static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+{
+	int ctx_size = wa_ctx->indirect_ctx.size;
+	unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+					struct intel_vgpu_workload,
+					wa_ctx);
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct drm_i915_gem_object *obj;
+	int ret = 0;
+	void *map;
+
+	obj = i915_gem_object_create_shmem(workload->engine->i915,
+					   roundup(ctx_size + CACHELINE_BYTES,
+						   PAGE_SIZE));
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	/* get the va of the shadow batch buffer */
+	map = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(map)) {
+		gvt_vgpu_err("failed to vmap shadow indirect ctx\n");
+		ret = PTR_ERR(map);
+		goto put_obj;
+	}
+
+	i915_gem_object_lock(obj, NULL);
+	ret = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
+	if (ret) {
+		gvt_vgpu_err("failed to set shadow indirect ctx to CPU\n");
+		goto unmap_src;
+	}
+
+	ret = copy_gma_to_hva(workload->vgpu,
+				workload->vgpu->gtt.ggtt_mm,
+				guest_gma, guest_gma + ctx_size,
+				map);
+	if (ret < 0) {
+		gvt_vgpu_err("fail to copy guest indirect ctx\n");
+		goto unmap_src;
+	}
+
+	wa_ctx->indirect_ctx.obj = obj;
+	wa_ctx->indirect_ctx.shadow_va = map;
+	return 0;
+
+unmap_src:
+	i915_gem_object_unpin_map(obj);
+put_obj:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+{
+	u32 per_ctx_start[CACHELINE_DWORDS] = {0};
+	unsigned char *bb_start_sva;
+
+	if (!wa_ctx->per_ctx.valid)
+		return 0;
+
+	per_ctx_start[0] = 0x18800001;
+	per_ctx_start[1] = wa_ctx->per_ctx.guest_gma;
+
+	bb_start_sva = (unsigned char *)wa_ctx->indirect_ctx.shadow_va +
+				wa_ctx->indirect_ctx.size;
+
+	memcpy(bb_start_sva, per_ctx_start, CACHELINE_BYTES);
+
+	return 0;
+}
+
+int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+{
+	int ret;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+					struct intel_vgpu_workload,
+					wa_ctx);
+	struct intel_vgpu *vgpu = workload->vgpu;
+
+	if (wa_ctx->indirect_ctx.size == 0)
+		return 0;
+
+	ret = shadow_indirect_ctx(wa_ctx);
+	if (ret) {
+		gvt_vgpu_err("fail to shadow indirect ctx\n");
+		return ret;
+	}
+
+	combine_wa_ctx(wa_ctx);
+
+	ret = scan_wa_ctx(wa_ctx);
+	if (ret) {
+		gvt_vgpu_err("scan wa ctx error\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+/* generate dummy contexts by sending empty requests to HW, and let
+ * the HW to fill Engine Contexts. This dummy contexts are used for
+ * initialization purpose (update reg whitelist), so referred to as
+ * init context here
+ */
+void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
+{
+	const unsigned long start = LRC_STATE_PN * PAGE_SIZE;
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (gvt->is_reg_whitelist_updated)
+		return;
+
+	/* scan init ctx to update cmd accessible list */
+	for_each_engine(engine, gvt->gt, id) {
+		struct parser_exec_state s;
+		void *vaddr;
+		int ret;
+
+		if (!engine->default_state)
+			continue;
+
+		vaddr = shmem_pin_map(engine->default_state);
+		if (!vaddr) {
+			gvt_err("failed to map %s->default state\n",
+				engine->name);
+			return;
+		}
+
+		s.buf_type = RING_BUFFER_CTX;
+		s.buf_addr_type = GTT_BUFFER;
+		s.vgpu = vgpu;
+		s.engine = engine;
+		s.ring_start = 0;
+		s.ring_size = engine->context_size - start;
+		s.ring_head = 0;
+		s.ring_tail = s.ring_size;
+		s.rb_va = vaddr + start;
+		s.workload = NULL;
+		s.is_ctx_wa = false;
+		s.is_init_ctx = true;
+
+		/* skipping the first RING_CTX_SIZE(0x50) dwords */
+		ret = ip_gma_set(&s, RING_CTX_SIZE);
+		if (ret == 0) {
+			ret = command_scan(&s, 0, s.ring_size, 0, s.ring_size);
+			if (ret)
+				gvt_err("Scan init ctx error\n");
+		}
+
+		shmem_unpin_map(engine->default_state, vaddr);
+		if (ret)
+			return;
+	}
+
+	gvt->is_reg_whitelist_updated = true;
+}
+
+int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	unsigned long gma_head, gma_tail, gma_start, ctx_size;
+	struct parser_exec_state s;
+	int ring_id = workload->engine->id;
+	struct intel_context *ce = vgpu->submission.shadow[ring_id];
+	int ret;
+
+	GEM_BUG_ON(atomic_read(&ce->pin_count) < 0);
+
+	ctx_size = workload->engine->context_size - PAGE_SIZE;
+
+	/* Only ring contxt is loaded to HW for inhibit context, no need to
+	 * scan engine context
+	 */
+	if (is_inhibit_context(ce))
+		return 0;
+
+	gma_start = i915_ggtt_offset(ce->state) + LRC_STATE_PN*PAGE_SIZE;
+	gma_head = 0;
+	gma_tail = ctx_size;
+
+	s.buf_type = RING_BUFFER_CTX;
+	s.buf_addr_type = GTT_BUFFER;
+	s.vgpu = workload->vgpu;
+	s.engine = workload->engine;
+	s.ring_start = gma_start;
+	s.ring_size = ctx_size;
+	s.ring_head = gma_start + gma_head;
+	s.ring_tail = gma_start + gma_tail;
+	s.rb_va = ce->lrc_reg_state;
+	s.workload = workload;
+	s.is_ctx_wa = false;
+	s.is_init_ctx = false;
+
+	/* don't scan the first RING_CTX_SIZE(0x50) dwords, as it's ring
+	 * context
+	 */
+	ret = ip_gma_set(&s, gma_start + gma_head + RING_CTX_SIZE);
+	if (ret)
+		goto out;
+
+	ret = command_scan(&s, gma_head, gma_tail,
+		gma_start, ctx_size);
+out:
+	if (ret)
+		gvt_vgpu_err("scan shadow ctx error\n");
+
+	return ret;
+}
+
+static int init_cmd_table(struct intel_gvt *gvt)
+{
+	unsigned int gen_type = intel_gvt_get_device_type(gvt);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cmd_info); i++) {
+		struct cmd_entry *e;
+
+		if (!(cmd_info[i].devices & gen_type))
+			continue;
+
+		e = kzalloc(sizeof(*e), GFP_KERNEL);
+		if (!e)
+			return -ENOMEM;
+
+		e->info = &cmd_info[i];
+		if (cmd_info[i].opcode == OP_MI_NOOP)
+			mi_noop_index = i;
+
+		INIT_HLIST_NODE(&e->hlist);
+		add_cmd_entry(gvt, e);
+		gvt_dbg_cmd("add %-30s op %04x flag %x devs %02x rings %02x\n",
+			    e->info->name, e->info->opcode, e->info->flag,
+			    e->info->devices, e->info->rings);
+	}
+
+	return 0;
+}
+
+static void clean_cmd_table(struct intel_gvt *gvt)
+{
+	struct hlist_node *tmp;
+	struct cmd_entry *e;
+	int i;
+
+	hash_for_each_safe(gvt->cmd_table, i, tmp, e, hlist)
+		kfree(e);
+
+	hash_init(gvt->cmd_table);
+}
+
+void intel_gvt_clean_cmd_parser(struct intel_gvt *gvt)
+{
+	clean_cmd_table(gvt);
+}
+
+int intel_gvt_init_cmd_parser(struct intel_gvt *gvt)
+{
+	int ret;
+
+	ret = init_cmd_table(gvt);
+	if (ret) {
+		intel_gvt_clean_cmd_parser(gvt);
+		return ret;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.h b/drivers/gpu/drm/i915/gvt/cmd_parser.h
new file mode 100644
index 0000000000..416d345e28
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Ke Yu
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Yulei Zhang <yulei.zhang@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+#ifndef _GVT_CMD_PARSER_H_
+#define _GVT_CMD_PARSER_H_
+
+#define GVT_CMD_HASH_BITS 7
+
+struct intel_gvt;
+struct intel_shadow_wa_ctx;
+struct intel_vgpu;
+struct intel_vgpu_workload;
+
+void intel_gvt_clean_cmd_parser(struct intel_gvt *gvt);
+
+int intel_gvt_init_cmd_parser(struct intel_gvt *gvt);
+
+int intel_gvt_scan_and_shadow_ringbuffer(struct intel_vgpu_workload *workload);
+
+int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx);
+
+void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu);
+
+int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/debug.h b/drivers/gpu/drm/i915/gvt/debug.h
new file mode 100644
index 0000000000..c6027125c1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/debug.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __GVT_DEBUG_H__
+#define __GVT_DEBUG_H__
+
+#define gvt_err(fmt, args...) \
+	pr_err("gvt: "fmt, ##args)
+
+#define gvt_vgpu_err(fmt, args...)					\
+do {									\
+	if (IS_ERR_OR_NULL(vgpu))					\
+		pr_err("gvt: "fmt, ##args);			\
+	else								\
+		pr_err("gvt: vgpu %d: "fmt, vgpu->id, ##args);\
+} while (0)
+
+#define gvt_dbg_core(fmt, args...) \
+	pr_debug("gvt: core: "fmt, ##args)
+
+#define gvt_dbg_irq(fmt, args...) \
+	pr_debug("gvt: irq: "fmt, ##args)
+
+#define gvt_dbg_mm(fmt, args...) \
+	pr_debug("gvt: mm: "fmt, ##args)
+
+#define gvt_dbg_mmio(fmt, args...) \
+	pr_debug("gvt: mmio: "fmt, ##args)
+
+#define gvt_dbg_dpy(fmt, args...) \
+	pr_debug("gvt: dpy: "fmt, ##args)
+
+#define gvt_dbg_el(fmt, args...) \
+	pr_debug("gvt: el: "fmt, ##args)
+
+#define gvt_dbg_sched(fmt, args...) \
+	pr_debug("gvt: sched: "fmt, ##args)
+
+#define gvt_dbg_render(fmt, args...) \
+	pr_debug("gvt: render: "fmt, ##args)
+
+#define gvt_dbg_cmd(fmt, args...) \
+	pr_debug("gvt: cmd: "fmt, ##args)
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/debugfs.c b/drivers/gpu/drm/i915/gvt/debugfs.c
new file mode 100644
index 0000000000..baccbf1761
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/debugfs.c
@@ -0,0 +1,230 @@
+/*
+ * Copyright(c) 2011-2017 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/debugfs.h>
+#include <linux/list_sort.h>
+#include "i915_drv.h"
+#include "gvt.h"
+
+struct mmio_diff_param {
+	struct intel_vgpu *vgpu;
+	int total;
+	int diff;
+	struct list_head diff_mmio_list;
+};
+
+struct diff_mmio {
+	struct list_head node;
+	u32 offset;
+	u32 preg;
+	u32 vreg;
+};
+
+/* Compare two diff_mmio items. */
+static int mmio_offset_compare(void *priv,
+	const struct list_head *a, const struct list_head *b)
+{
+	struct diff_mmio *ma;
+	struct diff_mmio *mb;
+
+	ma = container_of(a, struct diff_mmio, node);
+	mb = container_of(b, struct diff_mmio, node);
+	if (ma->offset < mb->offset)
+		return -1;
+	else if (ma->offset > mb->offset)
+		return 1;
+	return 0;
+}
+
+static inline int mmio_diff_handler(struct intel_gvt *gvt,
+				    u32 offset, void *data)
+{
+	struct mmio_diff_param *param = data;
+	struct diff_mmio *node;
+	u32 preg, vreg;
+
+	preg = intel_uncore_read_notrace(gvt->gt->uncore, _MMIO(offset));
+	vreg = vgpu_vreg(param->vgpu, offset);
+
+	if (preg != vreg) {
+		node = kmalloc(sizeof(*node), GFP_ATOMIC);
+		if (!node)
+			return -ENOMEM;
+
+		node->offset = offset;
+		node->preg = preg;
+		node->vreg = vreg;
+		list_add(&node->node, &param->diff_mmio_list);
+		param->diff++;
+	}
+	param->total++;
+	return 0;
+}
+
+/* Show the all the different values of tracked mmio. */
+static int vgpu_mmio_diff_show(struct seq_file *s, void *unused)
+{
+	struct intel_vgpu *vgpu = s->private;
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct mmio_diff_param param = {
+		.vgpu = vgpu,
+		.total = 0,
+		.diff = 0,
+	};
+	struct diff_mmio *node, *next;
+
+	INIT_LIST_HEAD(&param.diff_mmio_list);
+
+	mutex_lock(&gvt->lock);
+	spin_lock_bh(&gvt->scheduler.mmio_context_lock);
+
+	mmio_hw_access_pre(gvt->gt);
+	/* Recognize all the diff mmios to list. */
+	intel_gvt_for_each_tracked_mmio(gvt, mmio_diff_handler, &param);
+	mmio_hw_access_post(gvt->gt);
+
+	spin_unlock_bh(&gvt->scheduler.mmio_context_lock);
+	mutex_unlock(&gvt->lock);
+
+	/* In an ascending order by mmio offset. */
+	list_sort(NULL, &param.diff_mmio_list, mmio_offset_compare);
+
+	seq_printf(s, "%-8s %-8s %-8s %-8s\n", "Offset", "HW", "vGPU", "Diff");
+	list_for_each_entry_safe(node, next, &param.diff_mmio_list, node) {
+		u32 diff = node->preg ^ node->vreg;
+
+		seq_printf(s, "%08x %08x %08x %*pbl\n",
+			   node->offset, node->preg, node->vreg,
+			   32, &diff);
+		list_del(&node->node);
+		kfree(node);
+	}
+	seq_printf(s, "Total: %d, Diff: %d\n", param.total, param.diff);
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(vgpu_mmio_diff);
+
+static int
+vgpu_scan_nonprivbb_get(void *data, u64 *val)
+{
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)data;
+
+	*val = vgpu->scan_nonprivbb;
+	return 0;
+}
+
+/*
+ * set/unset bit engine_id of vgpu->scan_nonprivbb to turn on/off scanning
+ * of non-privileged batch buffer. e.g.
+ * if vgpu->scan_nonprivbb=3, then it will scan non-privileged batch buffer
+ * on engine 0 and 1.
+ */
+static int
+vgpu_scan_nonprivbb_set(void *data, u64 val)
+{
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)data;
+
+	vgpu->scan_nonprivbb = val;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(vgpu_scan_nonprivbb_fops,
+			 vgpu_scan_nonprivbb_get, vgpu_scan_nonprivbb_set,
+			 "0x%llx\n");
+
+static int vgpu_status_get(void *data, u64 *val)
+{
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)data;
+
+	*val = 0;
+
+	if (test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+		*val |= (1 << INTEL_VGPU_STATUS_ATTACHED);
+	if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
+		*val |= (1 << INTEL_VGPU_STATUS_ACTIVE);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(vgpu_status_fops, vgpu_status_get, NULL, "0x%llx\n");
+
+/**
+ * intel_gvt_debugfs_add_vgpu - register debugfs entries for a vGPU
+ * @vgpu: a vGPU
+ */
+void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu)
+{
+	char name[16] = "";
+
+	snprintf(name, 16, "vgpu%d", vgpu->id);
+	vgpu->debugfs = debugfs_create_dir(name, vgpu->gvt->debugfs_root);
+
+	debugfs_create_file("mmio_diff", 0444, vgpu->debugfs, vgpu,
+			    &vgpu_mmio_diff_fops);
+	debugfs_create_file_unsafe("scan_nonprivbb", 0644, vgpu->debugfs, vgpu,
+				   &vgpu_scan_nonprivbb_fops);
+	debugfs_create_file_unsafe("status", 0644, vgpu->debugfs, vgpu,
+				   &vgpu_status_fops);
+}
+
+/**
+ * intel_gvt_debugfs_remove_vgpu - remove debugfs entries of a vGPU
+ * @vgpu: a vGPU
+ */
+void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_minor *minor = gvt->gt->i915->drm.primary;
+
+	if (minor->debugfs_root && gvt->debugfs_root) {
+		debugfs_remove_recursive(vgpu->debugfs);
+		vgpu->debugfs = NULL;
+	}
+}
+
+/**
+ * intel_gvt_debugfs_init - register gvt debugfs root entry
+ * @gvt: GVT device
+ */
+void intel_gvt_debugfs_init(struct intel_gvt *gvt)
+{
+	struct drm_minor *minor = gvt->gt->i915->drm.primary;
+
+	gvt->debugfs_root = debugfs_create_dir("gvt", minor->debugfs_root);
+
+	debugfs_create_ulong("num_tracked_mmio", 0444, gvt->debugfs_root,
+			     &gvt->mmio.num_tracked_mmio);
+}
+
+/**
+ * intel_gvt_debugfs_clean - remove debugfs entries
+ * @gvt: GVT device
+ */
+void intel_gvt_debugfs_clean(struct intel_gvt *gvt)
+{
+	struct drm_minor *minor = gvt->gt->i915->drm.primary;
+
+	if (minor->debugfs_root) {
+		debugfs_remove_recursive(gvt->debugfs_root);
+		gvt->debugfs_root = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
new file mode 100644
index 0000000000..e0c5dfb788
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Ke Yu
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Terrence Xu <terrence.xu@intel.com>
+ *    Changbin Du <changbin.du@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gvt.h"
+
+#include "display/intel_display.h"
+#include "display/intel_dpio_phy.h"
+
+static int get_edp_pipe(struct intel_vgpu *vgpu)
+{
+	u32 data = vgpu_vreg(vgpu, _TRANS_DDI_FUNC_CTL_EDP);
+	int pipe = -1;
+
+	switch (data & TRANS_DDI_EDP_INPUT_MASK) {
+	case TRANS_DDI_EDP_INPUT_A_ON:
+	case TRANS_DDI_EDP_INPUT_A_ONOFF:
+		pipe = PIPE_A;
+		break;
+	case TRANS_DDI_EDP_INPUT_B_ONOFF:
+		pipe = PIPE_B;
+		break;
+	case TRANS_DDI_EDP_INPUT_C_ONOFF:
+		pipe = PIPE_C;
+		break;
+	}
+	return pipe;
+}
+
+static int edp_pipe_is_enabled(struct intel_vgpu *vgpu)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+
+	if (!(vgpu_vreg_t(vgpu, TRANSCONF(TRANSCODER_EDP)) & TRANSCONF_ENABLE))
+		return 0;
+
+	if (!(vgpu_vreg(vgpu, _TRANS_DDI_FUNC_CTL_EDP) & TRANS_DDI_FUNC_ENABLE))
+		return 0;
+	return 1;
+}
+
+int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+
+	if (drm_WARN_ON(&dev_priv->drm,
+			pipe < PIPE_A || pipe >= I915_MAX_PIPES))
+		return -EINVAL;
+
+	if (vgpu_vreg_t(vgpu, TRANSCONF(pipe)) & TRANSCONF_ENABLE)
+		return 1;
+
+	if (edp_pipe_is_enabled(vgpu) &&
+			get_edp_pipe(vgpu) == pipe)
+		return 1;
+	return 0;
+}
+
+static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = {
+	{
+/* EDID with 1024x768 as its resolution */
+		/*Header*/
+		0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+		/* Vendor & Product Identification */
+		0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
+		/* Version & Revision */
+		0x01, 0x04,
+		/* Basic Display Parameters & Features */
+		0xa5, 0x34, 0x20, 0x78, 0x23,
+		/* Color Characteristics */
+		0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
+		/* Established Timings: maximum resolution is 1024x768 */
+		0x21, 0x08, 0x00,
+		/* Standard Timings. All invalid */
+		0x00, 0xc0, 0x00, 0xc0, 0x00, 0x40, 0x00, 0x80, 0x00, 0x00,
+		0x00, 0x40, 0x00, 0x00, 0x00, 0x01,
+		/* 18 Byte Data Blocks 1: invalid */
+		0x00, 0x00, 0x80, 0xa0, 0x70, 0xb0,
+		0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
+		/* 18 Byte Data Blocks 2: invalid */
+		0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
+		0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+		/* 18 Byte Data Blocks 3: invalid */
+		0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
+		0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
+		/* 18 Byte Data Blocks 4: invalid */
+		0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
+		0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
+		/* Extension Block Count */
+		0x00,
+		/* Checksum */
+		0xef,
+	},
+	{
+/* EDID with 1920x1200 as its resolution */
+		/*Header*/
+		0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00,
+		/* Vendor & Product Identification */
+		0x22, 0xf0, 0x54, 0x29, 0x00, 0x00, 0x00, 0x00, 0x04, 0x17,
+		/* Version & Revision */
+		0x01, 0x04,
+		/* Basic Display Parameters & Features */
+		0xa5, 0x34, 0x20, 0x78, 0x23,
+		/* Color Characteristics */
+		0xfc, 0x81, 0xa4, 0x55, 0x4d, 0x9d, 0x25, 0x12, 0x50, 0x54,
+		/* Established Timings: maximum resolution is 1024x768 */
+		0x21, 0x08, 0x00,
+		/*
+		 * Standard Timings.
+		 * below new resolutions can be supported:
+		 * 1920x1080, 1280x720, 1280x960, 1280x1024,
+		 * 1440x900, 1600x1200, 1680x1050
+		 */
+		0xd1, 0xc0, 0x81, 0xc0, 0x81, 0x40, 0x81, 0x80, 0x95, 0x00,
+		0xa9, 0x40, 0xb3, 0x00, 0x01, 0x01,
+		/* 18 Byte Data Blocks 1: max resolution is 1920x1200 */
+		0x28, 0x3c, 0x80, 0xa0, 0x70, 0xb0,
+		0x23, 0x40, 0x30, 0x20, 0x36, 0x00, 0x06, 0x44, 0x21, 0x00, 0x00, 0x1a,
+		/* 18 Byte Data Blocks 2: invalid */
+		0x00, 0x00, 0x00, 0xfd, 0x00, 0x18, 0x3c, 0x18, 0x50, 0x11, 0x00, 0x0a,
+		0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
+		/* 18 Byte Data Blocks 3: invalid */
+		0x00, 0x00, 0x00, 0xfc, 0x00, 0x48,
+		0x50, 0x20, 0x5a, 0x52, 0x32, 0x34, 0x34, 0x30, 0x77, 0x0a, 0x20, 0x20,
+		/* 18 Byte Data Blocks 4: invalid */
+		0x00, 0x00, 0x00, 0xff, 0x00, 0x43, 0x4e, 0x34, 0x33, 0x30, 0x34, 0x30,
+		0x44, 0x58, 0x51, 0x0a, 0x20, 0x20,
+		/* Extension Block Count */
+		0x00,
+		/* Checksum */
+		0x45,
+	},
+};
+
+#define DPCD_HEADER_SIZE        0xb
+
+/* let the virtual display supports DP1.2 */
+static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = {
+	0x12, 0x014, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	int pipe;
+
+	if (IS_BROXTON(dev_priv)) {
+		enum transcoder trans;
+		enum port port;
+
+		/* Clear PIPE, DDI, PHY, HPD before setting new */
+		vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
+			~(GEN8_DE_PORT_HOTPLUG(HPD_PORT_A) |
+			  GEN8_DE_PORT_HOTPLUG(HPD_PORT_B) |
+			  GEN8_DE_PORT_HOTPLUG(HPD_PORT_C));
+
+		for_each_pipe(dev_priv, pipe) {
+			vgpu_vreg_t(vgpu, TRANSCONF(pipe)) &=
+				~(TRANSCONF_ENABLE | TRANSCONF_STATE_ENABLE);
+			vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= ~DISP_ENABLE;
+			vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE;
+			vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~MCURSOR_MODE_MASK;
+			vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= MCURSOR_MODE_DISABLE;
+		}
+
+		for (trans = TRANSCODER_A; trans <= TRANSCODER_EDP; trans++) {
+			vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(trans)) &=
+				~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+				  TRANS_DDI_PORT_MASK | TRANS_DDI_FUNC_ENABLE);
+		}
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			  TRANS_DDI_PORT_MASK);
+
+		for (port = PORT_A; port <= PORT_C; port++) {
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(port)) &=
+				~BXT_PHY_LANE_ENABLED;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(port)) |=
+				(BXT_PHY_CMNLANE_POWERDOWN_ACK |
+				 BXT_PHY_LANE_POWERDOWN_ACK);
+
+			vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(port)) &=
+				~(PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
+				  PORT_PLL_REF_SEL | PORT_PLL_LOCK |
+				  PORT_PLL_ENABLE);
+
+			vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) &=
+				~(DDI_INIT_DISPLAY_DETECTED |
+				  DDI_BUF_CTL_ENABLE);
+			vgpu_vreg_t(vgpu, DDI_BUF_CTL(port)) |= DDI_BUF_IS_IDLE;
+		}
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+			~(PORTA_HOTPLUG_ENABLE | PORTA_HOTPLUG_STATUS_MASK);
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+			~(PORTB_HOTPLUG_ENABLE | PORTB_HOTPLUG_STATUS_MASK);
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+			~(PORTC_HOTPLUG_ENABLE | PORTC_HOTPLUG_STATUS_MASK);
+		/* No hpd_invert set in vgpu vbt, need to clear invert mask */
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &= ~BXT_DDI_HPD_INVERT_MASK;
+		vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &= ~BXT_DE_PORT_HOTPLUG_MASK;
+
+		vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &= ~(BIT(0) | BIT(1));
+		vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &=
+			~PHY_POWER_GOOD;
+		vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) &=
+			~PHY_POWER_GOOD;
+		vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) &= ~BIT(30);
+		vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY1)) &= ~BIT(30);
+
+		vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIB_DETECTED;
+		vgpu_vreg_t(vgpu, SFUSE_STRAP) &= ~SFUSE_STRAP_DDIC_DETECTED;
+
+		/*
+		 * Only 1 PIPE enabled in current vGPU display and PIPE_A is
+		 *  tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A,
+		 *   TRANSCODER_A can be enabled. PORT_x depends on the input of
+		 *   setup_virtual_dp_monitor.
+		 */
+		vgpu_vreg_t(vgpu, TRANSCONF(TRANSCODER_A)) |= TRANSCONF_ENABLE;
+		vgpu_vreg_t(vgpu, TRANSCONF(TRANSCODER_A)) |= TRANSCONF_STATE_ENABLE;
+
+		/*
+		 * Golden M/N are calculated based on:
+		 *   24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID),
+		 *   DP link clk 1620 MHz and non-constant_n.
+		 * TODO: calculate DP link symbol clk and stream clk m/n.
+		 */
+		vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = TU_SIZE(64);
+		vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e;
+		vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000;
+		vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e;
+		vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;
+
+		/* Enable per-DDI/PORT vreg */
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
+			vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(1);
+			vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) |=
+				PHY_POWER_GOOD;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY1)) |=
+				BIT(30);
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_A)) |=
+				BXT_PHY_LANE_ENABLED;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_A)) &=
+				~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
+				  BXT_PHY_LANE_POWERDOWN_ACK);
+			vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_A)) |=
+				(PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
+				 PORT_PLL_REF_SEL | PORT_PLL_LOCK |
+				 PORT_PLL_ENABLE);
+			vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) |=
+				(DDI_BUF_CTL_ENABLE | DDI_INIT_DISPLAY_DETECTED);
+			vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) &=
+				~DDI_BUF_IS_IDLE;
+			vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_EDP)) |=
+				(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+				 TRANS_DDI_FUNC_ENABLE);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTA_HOTPLUG_ENABLE;
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
+		}
+
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+			vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+			vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(0);
+			vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) |=
+				PHY_POWER_GOOD;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) |=
+				BIT(30);
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_B)) |=
+				BXT_PHY_LANE_ENABLED;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_B)) &=
+				~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
+				  BXT_PHY_LANE_POWERDOWN_ACK);
+			vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_B)) |=
+				(PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
+				 PORT_PLL_REF_SEL | PORT_PLL_LOCK |
+				 PORT_PLL_ENABLE);
+			vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) |=
+				DDI_BUF_CTL_ENABLE;
+			vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &=
+				~DDI_BUF_IS_IDLE;
+			vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+				(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+				 (PORT_B << TRANS_DDI_PORT_SHIFT) |
+				 TRANS_DDI_FUNC_ENABLE);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTB_HOTPLUG_ENABLE;
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
+		}
+
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+			vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
+			vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) |= BIT(0);
+			vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) |=
+				PHY_POWER_GOOD;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) |=
+				BIT(30);
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) |=
+				BXT_PHY_LANE_ENABLED;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) &=
+				~(BXT_PHY_CMNLANE_POWERDOWN_ACK |
+				  BXT_PHY_LANE_POWERDOWN_ACK);
+			vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(PORT_C)) |=
+				(PORT_PLL_POWER_STATE | PORT_PLL_POWER_ENABLE |
+				 PORT_PLL_REF_SEL | PORT_PLL_LOCK |
+				 PORT_PLL_ENABLE);
+			vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) |=
+				DDI_BUF_CTL_ENABLE;
+			vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &=
+				~DDI_BUF_IS_IDLE;
+			vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+				(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+				 (PORT_B << TRANS_DDI_PORT_SHIFT) |
+				 TRANS_DDI_FUNC_ENABLE);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTC_HOTPLUG_ENABLE;
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
+		}
+
+		return;
+	}
+
+	vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT |
+			SDE_PORTC_HOTPLUG_CPT |
+			SDE_PORTD_HOTPLUG_CPT);
+
+	if (IS_SKYLAKE(dev_priv) ||
+	    IS_KABYLAKE(dev_priv) ||
+	    IS_COFFEELAKE(dev_priv) ||
+	    IS_COMETLAKE(dev_priv)) {
+		vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
+				SDE_PORTE_HOTPLUG_SPT);
+		vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |=
+				SKL_FUSE_DOWNLOAD_STATUS |
+				SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
+				SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
+				SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
+		/*
+		 * Only 1 PIPE enabled in current vGPU display and PIPE_A is
+		 *  tied to TRANSCODER_A in HW, so it's safe to assume PIPE_A,
+		 *   TRANSCODER_A can be enabled. PORT_x depends on the input of
+		 *   setup_virtual_dp_monitor, we can bind DPLL0 to any PORT_x
+		 *   so we fixed to DPLL0 here.
+		 * Setup DPLL0: DP link clk 1620 MHz, non SSC, DP Mode
+		 */
+		vgpu_vreg_t(vgpu, DPLL_CTRL1) =
+			DPLL_CTRL1_OVERRIDE(DPLL_ID_SKL_DPLL0);
+		vgpu_vreg_t(vgpu, DPLL_CTRL1) |=
+			DPLL_CTRL1_LINK_RATE(DPLL_CTRL1_LINK_RATE_1620, DPLL_ID_SKL_DPLL0);
+		vgpu_vreg_t(vgpu, LCPLL1_CTL) =
+			LCPLL_PLL_ENABLE | LCPLL_PLL_LOCK;
+		vgpu_vreg_t(vgpu, DPLL_STATUS) = DPLL_LOCK(DPLL_ID_SKL_DPLL0);
+		/*
+		 * Golden M/N are calculated based on:
+		 *   24 bpp, 4 lanes, 154000 pixel clk (from virtual EDID),
+		 *   DP link clk 1620 MHz and non-constant_n.
+		 * TODO: calculate DP link symbol clk and stream clk m/n.
+		 */
+		vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) = TU_SIZE(64);
+		vgpu_vreg_t(vgpu, PIPE_DATA_M1(TRANSCODER_A)) |= 0x5b425e;
+		vgpu_vreg_t(vgpu, PIPE_DATA_N1(TRANSCODER_A)) = 0x800000;
+		vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A)) = 0x3cd6e;
+		vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A)) = 0x80000;
+	}
+
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+			~DPLL_CTRL2_DDI_CLK_OFF(PORT_B);
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+			DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_B);
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+			DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_B);
+		vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_B << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
+		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
+	}
+
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+			~DPLL_CTRL2_DDI_CLK_OFF(PORT_C);
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+			DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_C);
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+			DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_C);
+		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_C << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
+		vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
+	}
+
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) &=
+			~DPLL_CTRL2_DDI_CLK_OFF(PORT_D);
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+			DPLL_CTRL2_DDI_CLK_SEL(DPLL_ID_SKL_DPLL0, PORT_D);
+		vgpu_vreg_t(vgpu, DPLL_CTRL2) |=
+			DPLL_CTRL2_DDI_SEL_OVERRIDE(PORT_D);
+		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_D << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		if (IS_BROADWELL(dev_priv)) {
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) &=
+				~PORT_CLK_SEL_MASK;
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) |=
+				PORT_CLK_SEL_LCPLL_810;
+		}
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
+		vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
+	}
+
+	if ((IS_SKYLAKE(dev_priv) ||
+	     IS_KABYLAKE(dev_priv) ||
+	     IS_COFFEELAKE(dev_priv) ||
+	     IS_COMETLAKE(dev_priv)) &&
+			intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
+		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT;
+	}
+
+	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
+		if (IS_BROADWELL(dev_priv))
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
+		else
+			vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT;
+
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
+	}
+
+	/* Clear host CRT status, so guest couldn't detect this host CRT. */
+	if (IS_BROADWELL(dev_priv))
+		vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
+
+	/* Disable Primary/Sprite/Cursor plane */
+	for_each_pipe(dev_priv, pipe) {
+		vgpu_vreg_t(vgpu, DSPCNTR(pipe)) &= ~DISP_ENABLE;
+		vgpu_vreg_t(vgpu, SPRCTL(pipe)) &= ~SPRITE_ENABLE;
+		vgpu_vreg_t(vgpu, CURCNTR(pipe)) &= ~MCURSOR_MODE_MASK;
+		vgpu_vreg_t(vgpu, CURCNTR(pipe)) |= MCURSOR_MODE_DISABLE;
+	}
+
+	vgpu_vreg_t(vgpu, TRANSCONF(TRANSCODER_A)) |= TRANSCONF_ENABLE;
+}
+
+static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
+{
+	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
+
+	kfree(port->edid);
+	port->edid = NULL;
+
+	kfree(port->dpcd);
+	port->dpcd = NULL;
+}
+
+static enum hrtimer_restart vblank_timer_fn(struct hrtimer *data)
+{
+	struct intel_vgpu_vblank_timer *vblank_timer;
+	struct intel_vgpu *vgpu;
+
+	vblank_timer = container_of(data, struct intel_vgpu_vblank_timer, timer);
+	vgpu = container_of(vblank_timer, struct intel_vgpu, vblank_timer);
+
+	/* Set vblank emulation request per-vGPU bit */
+	intel_gvt_request_service(vgpu->gvt,
+				  INTEL_GVT_REQUEST_EMULATE_VBLANK + vgpu->id);
+	hrtimer_add_expires_ns(&vblank_timer->timer, vblank_timer->period);
+	return HRTIMER_RESTART;
+}
+
+static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
+				    int type, unsigned int resolution)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
+	struct intel_vgpu_vblank_timer *vblank_timer = &vgpu->vblank_timer;
+
+	if (drm_WARN_ON(&i915->drm, resolution >= GVT_EDID_NUM))
+		return -EINVAL;
+
+	port->edid = kzalloc(sizeof(*(port->edid)), GFP_KERNEL);
+	if (!port->edid)
+		return -ENOMEM;
+
+	port->dpcd = kzalloc(sizeof(*(port->dpcd)), GFP_KERNEL);
+	if (!port->dpcd) {
+		kfree(port->edid);
+		return -ENOMEM;
+	}
+
+	memcpy(port->edid->edid_block, virtual_dp_monitor_edid[resolution],
+			EDID_SIZE);
+	port->edid->data_valid = true;
+
+	memcpy(port->dpcd->data, dpcd_fix_data, DPCD_HEADER_SIZE);
+	port->dpcd->data_valid = true;
+	port->dpcd->data[DPCD_SINK_COUNT] = 0x1;
+	port->type = type;
+	port->id = resolution;
+	port->vrefresh_k = GVT_DEFAULT_REFRESH_RATE * MSEC_PER_SEC;
+	vgpu->display.port_num = port_num;
+
+	/* Init hrtimer based on default refresh rate */
+	hrtimer_init(&vblank_timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	vblank_timer->timer.function = vblank_timer_fn;
+	vblank_timer->vrefresh_k = port->vrefresh_k;
+	vblank_timer->period = DIV64_U64_ROUND_CLOSEST(NSEC_PER_SEC * MSEC_PER_SEC, vblank_timer->vrefresh_k);
+
+	emulate_monitor_status_change(vgpu);
+
+	return 0;
+}
+
+/**
+ * vgpu_update_vblank_emulation - Update per-vGPU vblank_timer
+ * @vgpu: vGPU operated
+ * @turnon: Turn ON/OFF vblank_timer
+ *
+ * This function is used to turn on/off or update the per-vGPU vblank_timer
+ * when TRANSCONF is enabled or disabled. vblank_timer period is also updated
+ * if guest changed the refresh rate.
+ *
+ */
+void vgpu_update_vblank_emulation(struct intel_vgpu *vgpu, bool turnon)
+{
+	struct intel_vgpu_vblank_timer *vblank_timer = &vgpu->vblank_timer;
+	struct intel_vgpu_port *port =
+		intel_vgpu_port(vgpu, vgpu->display.port_num);
+
+	if (turnon) {
+		/*
+		 * Skip the re-enable if already active and vrefresh unchanged.
+		 * Otherwise, stop timer if already active and restart with new
+		 *   period.
+		 */
+		if (vblank_timer->vrefresh_k != port->vrefresh_k ||
+		    !hrtimer_active(&vblank_timer->timer)) {
+			/* Stop timer before start with new period if active */
+			if (hrtimer_active(&vblank_timer->timer))
+				hrtimer_cancel(&vblank_timer->timer);
+
+			/* Make sure new refresh rate updated to timer period */
+			vblank_timer->vrefresh_k = port->vrefresh_k;
+			vblank_timer->period = DIV64_U64_ROUND_CLOSEST(NSEC_PER_SEC * MSEC_PER_SEC, vblank_timer->vrefresh_k);
+			hrtimer_start(&vblank_timer->timer,
+				      ktime_add_ns(ktime_get(), vblank_timer->period),
+				      HRTIMER_MODE_ABS);
+		}
+	} else {
+		/* Caller request to stop vblank */
+		hrtimer_cancel(&vblank_timer->timer);
+	}
+}
+
+static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	struct intel_vgpu_irq *irq = &vgpu->irq;
+	int vblank_event[] = {
+		[PIPE_A] = PIPE_A_VBLANK,
+		[PIPE_B] = PIPE_B_VBLANK,
+		[PIPE_C] = PIPE_C_VBLANK,
+	};
+	int event;
+
+	if (pipe < PIPE_A || pipe > PIPE_C)
+		return;
+
+	for_each_set_bit(event, irq->flip_done_event[pipe],
+			INTEL_GVT_EVENT_MAX) {
+		clear_bit(event, irq->flip_done_event[pipe]);
+		if (!pipe_is_enabled(vgpu, pipe))
+			continue;
+
+		intel_vgpu_trigger_virtual_event(vgpu, event);
+	}
+
+	if (pipe_is_enabled(vgpu, pipe)) {
+		vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(pipe))++;
+		intel_vgpu_trigger_virtual_event(vgpu, vblank_event[pipe]);
+	}
+}
+
+void intel_vgpu_emulate_vblank(struct intel_vgpu *vgpu)
+{
+	int pipe;
+
+	mutex_lock(&vgpu->vgpu_lock);
+	for_each_pipe(vgpu->gvt->gt->i915, pipe)
+		emulate_vblank_on_pipe(vgpu, pipe);
+	mutex_unlock(&vgpu->vgpu_lock);
+}
+
+/**
+ * intel_vgpu_emulate_hotplug - trigger hotplug event for vGPU
+ * @vgpu: a vGPU
+ * @connected: link state
+ *
+ * This function is used to trigger hotplug interrupt for vGPU
+ *
+ */
+void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+
+	/* TODO: add more platforms support */
+	if (IS_SKYLAKE(i915) ||
+	    IS_KABYLAKE(i915) ||
+	    IS_COFFEELAKE(i915) ||
+	    IS_COMETLAKE(i915)) {
+		if (connected) {
+			vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
+				SFUSE_STRAP_DDID_DETECTED;
+			vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		} else {
+			vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
+				~SFUSE_STRAP_DDID_DETECTED;
+			vgpu_vreg_t(vgpu, SDEISR) &= ~SDE_PORTD_HOTPLUG_CPT;
+		}
+		vgpu_vreg_t(vgpu, SDEIIR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTD_HOTPLUG_STATUS_MASK;
+		intel_vgpu_trigger_virtual_event(vgpu, DP_D_HOTPLUG);
+	} else if (IS_BROXTON(i915)) {
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
+			if (connected) {
+				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+					GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
+			} else {
+				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
+					~GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
+			}
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_A);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+				~PORTA_HOTPLUG_STATUS_MASK;
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTA_HOTPLUG_LONG_DETECT;
+			intel_vgpu_trigger_virtual_event(vgpu, DP_A_HOTPLUG);
+		}
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
+			if (connected) {
+				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+					GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
+				vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
+					SFUSE_STRAP_DDIB_DETECTED;
+			} else {
+				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
+					~GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
+				vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
+					~SFUSE_STRAP_DDIB_DETECTED;
+			}
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_B);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+				~PORTB_HOTPLUG_STATUS_MASK;
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTB_HOTPLUG_LONG_DETECT;
+			intel_vgpu_trigger_virtual_event(vgpu, DP_B_HOTPLUG);
+		}
+		if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
+			if (connected) {
+				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
+					GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
+				vgpu_vreg_t(vgpu, SFUSE_STRAP) |=
+					SFUSE_STRAP_DDIC_DETECTED;
+			} else {
+				vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) &=
+					~GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
+				vgpu_vreg_t(vgpu, SFUSE_STRAP) &=
+					~SFUSE_STRAP_DDIC_DETECTED;
+			}
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_IIR) |=
+				GEN8_DE_PORT_HOTPLUG(HPD_PORT_C);
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) &=
+				~PORTC_HOTPLUG_STATUS_MASK;
+			vgpu_vreg_t(vgpu, PCH_PORT_HOTPLUG) |=
+				PORTC_HOTPLUG_LONG_DETECT;
+			intel_vgpu_trigger_virtual_event(vgpu, DP_C_HOTPLUG);
+		}
+	}
+}
+
+/**
+ * intel_vgpu_clean_display - clean vGPU virtual display emulation
+ * @vgpu: a vGPU
+ *
+ * This function is used to clean vGPU virtual display emulation stuffs
+ *
+ */
+void intel_vgpu_clean_display(struct intel_vgpu *vgpu)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+
+	if (IS_SKYLAKE(dev_priv) ||
+	    IS_KABYLAKE(dev_priv) ||
+	    IS_COFFEELAKE(dev_priv) ||
+	    IS_COMETLAKE(dev_priv))
+		clean_virtual_dp_monitor(vgpu, PORT_D);
+	else
+		clean_virtual_dp_monitor(vgpu, PORT_B);
+
+	vgpu_update_vblank_emulation(vgpu, false);
+}
+
+/**
+ * intel_vgpu_init_display- initialize vGPU virtual display emulation
+ * @vgpu: a vGPU
+ * @resolution: resolution index for intel_vgpu_edid
+ *
+ * This function is used to initialize vGPU virtual display emulation stuffs
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+
+	intel_vgpu_init_i2c_edid(vgpu);
+
+	if (IS_SKYLAKE(dev_priv) ||
+	    IS_KABYLAKE(dev_priv) ||
+	    IS_COFFEELAKE(dev_priv) ||
+	    IS_COMETLAKE(dev_priv))
+		return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D,
+						resolution);
+	else
+		return setup_virtual_dp_monitor(vgpu, PORT_B, GVT_DP_B,
+						resolution);
+}
+
+/**
+ * intel_vgpu_reset_display- reset vGPU virtual display emulation
+ * @vgpu: a vGPU
+ *
+ * This function is used to reset vGPU virtual display emulation stuffs
+ *
+ */
+void intel_vgpu_reset_display(struct intel_vgpu *vgpu)
+{
+	emulate_monitor_status_change(vgpu);
+}
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
new file mode 100644
index 0000000000..f5616f99ef
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -0,0 +1,224 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Ke Yu
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Terrence Xu <terrence.xu@intel.com>
+ *    Changbin Du <changbin.du@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#ifndef _GVT_DISPLAY_H_
+#define _GVT_DISPLAY_H_
+
+#include <linux/types.h>
+#include <linux/hrtimer.h>
+
+struct intel_gvt;
+struct intel_vgpu;
+
+#define SBI_REG_MAX	20
+#define DPCD_SIZE	0x700
+
+#define intel_vgpu_port(vgpu, port) \
+	(&(vgpu->display.ports[port]))
+
+#define intel_vgpu_has_monitor_on_port(vgpu, port) \
+	(intel_vgpu_port(vgpu, port)->edid && \
+		intel_vgpu_port(vgpu, port)->edid->data_valid)
+
+#define intel_vgpu_port_is_dp(vgpu, port) \
+	((intel_vgpu_port(vgpu, port)->type == GVT_DP_A) || \
+	(intel_vgpu_port(vgpu, port)->type == GVT_DP_B) || \
+	(intel_vgpu_port(vgpu, port)->type == GVT_DP_C) || \
+	(intel_vgpu_port(vgpu, port)->type == GVT_DP_D))
+
+#define INTEL_GVT_MAX_UEVENT_VARS	3
+
+/* DPCD start */
+#define DPCD_SIZE	0x700
+
+/* DPCD */
+#define DP_SET_POWER            0x600
+#define DP_SET_POWER_D0         0x1
+#define AUX_NATIVE_WRITE        0x8
+#define AUX_NATIVE_READ         0x9
+
+#define AUX_NATIVE_REPLY_MASK   (0x3 << 4)
+#define AUX_NATIVE_REPLY_ACK    (0x0 << 4)
+#define AUX_NATIVE_REPLY_NAK    (0x1 << 4)
+#define AUX_NATIVE_REPLY_DEFER  (0x2 << 4)
+
+#define AUX_BURST_SIZE          20
+
+/* DPCD addresses */
+#define DPCD_REV			0x000
+#define DPCD_MAX_LINK_RATE		0x001
+#define DPCD_MAX_LANE_COUNT		0x002
+
+#define DPCD_TRAINING_PATTERN_SET	0x102
+#define	DPCD_SINK_COUNT			0x200
+#define DPCD_LANE0_1_STATUS		0x202
+#define DPCD_LANE2_3_STATUS		0x203
+#define DPCD_LANE_ALIGN_STATUS_UPDATED	0x204
+#define DPCD_SINK_STATUS		0x205
+
+/* link training */
+#define DPCD_TRAINING_PATTERN_SET_MASK	0x03
+#define DPCD_LINK_TRAINING_DISABLED	0x00
+#define DPCD_TRAINING_PATTERN_1		0x01
+#define DPCD_TRAINING_PATTERN_2		0x02
+
+#define DPCD_CP_READY_MASK		(1 << 6)
+
+/* lane status */
+#define DPCD_LANES_CR_DONE		0x11
+#define DPCD_LANES_EQ_DONE		0x22
+#define DPCD_SYMBOL_LOCKED		0x44
+
+#define DPCD_INTERLANE_ALIGN_DONE	0x01
+
+#define DPCD_SINK_IN_SYNC		0x03
+/* DPCD end */
+
+#define SBI_RESPONSE_MASK               0x3
+#define SBI_RESPONSE_SHIFT              0x1
+#define SBI_STAT_MASK                   0x1
+#define SBI_STAT_SHIFT                  0x0
+#define SBI_OPCODE_SHIFT                8
+#define SBI_OPCODE_MASK			(0xff << SBI_OPCODE_SHIFT)
+#define SBI_CMD_IORD                    2
+#define SBI_CMD_IOWR                    3
+#define SBI_CMD_CRRD                    6
+#define SBI_CMD_CRWR                    7
+#define SBI_ADDR_OFFSET_SHIFT           16
+#define SBI_ADDR_OFFSET_MASK            (0xffff << SBI_ADDR_OFFSET_SHIFT)
+
+struct intel_vgpu_sbi_register {
+	unsigned int offset;
+	u32 value;
+};
+
+struct intel_vgpu_sbi {
+	int number;
+	struct intel_vgpu_sbi_register registers[SBI_REG_MAX];
+};
+
+enum intel_gvt_plane_type {
+	PRIMARY_PLANE = 0,
+	CURSOR_PLANE,
+	SPRITE_PLANE,
+	MAX_PLANE
+};
+
+struct intel_vgpu_dpcd_data {
+	bool data_valid;
+	u8 data[DPCD_SIZE];
+};
+
+enum intel_vgpu_port_type {
+	GVT_CRT = 0,
+	GVT_DP_A,
+	GVT_DP_B,
+	GVT_DP_C,
+	GVT_DP_D,
+	GVT_HDMI_B,
+	GVT_HDMI_C,
+	GVT_HDMI_D,
+	GVT_PORT_MAX
+};
+
+enum intel_vgpu_edid {
+	GVT_EDID_1024_768,
+	GVT_EDID_1920_1200,
+	GVT_EDID_NUM,
+};
+
+#define GVT_DEFAULT_REFRESH_RATE 60
+struct intel_vgpu_port {
+	/* per display EDID information */
+	struct intel_vgpu_edid_data *edid;
+	/* per display DPCD information */
+	struct intel_vgpu_dpcd_data *dpcd;
+	int type;
+	enum intel_vgpu_edid id;
+	/* x1000 to get accurate 59.94, 24.976, 29.94, etc. in timing std. */
+	u32 vrefresh_k;
+};
+
+struct intel_vgpu_vblank_timer {
+	struct hrtimer timer;
+	u32 vrefresh_k;
+	u64 period;
+};
+
+static inline char *vgpu_edid_str(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return "1024x768";
+	case GVT_EDID_1920_1200:
+		return "1920x1200";
+	default:
+		return "";
+	}
+}
+
+static inline unsigned int vgpu_edid_xres(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return 1024;
+	case GVT_EDID_1920_1200:
+		return 1920;
+	default:
+		return 0;
+	}
+}
+
+static inline unsigned int vgpu_edid_yres(enum intel_vgpu_edid id)
+{
+	switch (id) {
+	case GVT_EDID_1024_768:
+		return 768;
+	case GVT_EDID_1920_1200:
+		return 1200;
+	default:
+		return 0;
+	}
+}
+
+void intel_vgpu_emulate_vblank(struct intel_vgpu *vgpu);
+void vgpu_update_vblank_emulation(struct intel_vgpu *vgpu, bool turnon);
+
+int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution);
+void intel_vgpu_reset_display(struct intel_vgpu *vgpu);
+void intel_vgpu_clean_display(struct intel_vgpu *vgpu);
+
+int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
new file mode 100644
index 0000000000..6834f9fe40
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -0,0 +1,592 @@
+/*
+ * Copyright 2017 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Xiaoguang Chen
+ *    Tina Zhang <tina.zhang@intel.com>
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/mdev.h>
+
+#include <drm/drm_fourcc.h>
+#include <drm/drm_plane.h>
+
+#include "gem/i915_gem_dmabuf.h"
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gvt.h"
+
+#define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12))
+
+static int vgpu_gem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct intel_vgpu *vgpu;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i, j, ret;
+	gen8_pte_t __iomem *gtt_entries;
+	struct intel_vgpu_fb_info *fb_info;
+	unsigned int page_num; /* limited by sg_alloc_table */
+
+	if (overflows_type(obj->base.size >> PAGE_SHIFT, page_num))
+		return -E2BIG;
+
+	page_num = obj->base.size >> PAGE_SHIFT;
+	fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info;
+	if (drm_WARN_ON(&dev_priv->drm, !fb_info))
+		return -ENODEV;
+
+	vgpu = fb_info->obj->vgpu;
+	if (drm_WARN_ON(&dev_priv->drm, !vgpu))
+		return -ENODEV;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (unlikely(!st))
+		return -ENOMEM;
+
+	ret = sg_alloc_table(st, page_num, GFP_KERNEL);
+	if (ret) {
+		kfree(st);
+		return ret;
+	}
+	gtt_entries = (gen8_pte_t __iomem *)to_gt(dev_priv)->ggtt->gsm +
+		(fb_info->start >> PAGE_SHIFT);
+	for_each_sg(st->sgl, sg, page_num, i) {
+		dma_addr_t dma_addr =
+			GEN8_DECODE_PTE(readq(&gtt_entries[i]));
+		if (intel_gvt_dma_pin_guest_page(vgpu, dma_addr)) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		sg->offset = 0;
+		sg->length = PAGE_SIZE;
+		sg_dma_len(sg) = PAGE_SIZE;
+		sg_dma_address(sg) = dma_addr;
+	}
+
+	__i915_gem_object_set_pages(obj, st);
+out:
+	if (ret) {
+		dma_addr_t dma_addr;
+
+		for_each_sg(st->sgl, sg, i, j) {
+			dma_addr = sg_dma_address(sg);
+			if (dma_addr)
+				intel_gvt_dma_unmap_guest_page(vgpu, dma_addr);
+		}
+		sg_free_table(st);
+		kfree(st);
+	}
+
+	return ret;
+
+}
+
+static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj,
+		struct sg_table *pages)
+{
+	struct scatterlist *sg;
+
+	if (obj->base.dma_buf) {
+		struct intel_vgpu_fb_info *fb_info = obj->gvt_info;
+		struct intel_vgpu_dmabuf_obj *obj = fb_info->obj;
+		struct intel_vgpu *vgpu = obj->vgpu;
+		int i;
+
+		for_each_sg(pages->sgl, sg, fb_info->size, i)
+			intel_gvt_dma_unmap_guest_page(vgpu,
+					       sg_dma_address(sg));
+	}
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static void dmabuf_gem_object_free(struct kref *kref)
+{
+	struct intel_vgpu_dmabuf_obj *obj =
+		container_of(kref, struct intel_vgpu_dmabuf_obj, kref);
+	struct intel_vgpu *vgpu = obj->vgpu;
+	struct list_head *pos;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj;
+
+	if (vgpu && test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status) &&
+	    !list_empty(&vgpu->dmabuf_obj_list_head)) {
+		list_for_each(pos, &vgpu->dmabuf_obj_list_head) {
+			dmabuf_obj = list_entry(pos, struct intel_vgpu_dmabuf_obj, list);
+			if (dmabuf_obj == obj) {
+				list_del(pos);
+				idr_remove(&vgpu->object_idr,
+					   dmabuf_obj->dmabuf_id);
+				kfree(dmabuf_obj->info);
+				kfree(dmabuf_obj);
+				break;
+			}
+		}
+	} else {
+		/* Free the orphan dmabuf_objs here */
+		kfree(obj->info);
+		kfree(obj);
+	}
+}
+
+
+static inline void dmabuf_obj_get(struct intel_vgpu_dmabuf_obj *obj)
+{
+	kref_get(&obj->kref);
+}
+
+static inline void dmabuf_obj_put(struct intel_vgpu_dmabuf_obj *obj)
+{
+	kref_put(&obj->kref, dmabuf_gem_object_free);
+}
+
+static void vgpu_gem_release(struct drm_i915_gem_object *gem_obj)
+{
+
+	struct intel_vgpu_fb_info *fb_info = gem_obj->gvt_info;
+	struct intel_vgpu_dmabuf_obj *obj = fb_info->obj;
+	struct intel_vgpu *vgpu = obj->vgpu;
+
+	if (vgpu) {
+		mutex_lock(&vgpu->dmabuf_lock);
+		gem_obj->base.dma_buf = NULL;
+		dmabuf_obj_put(obj);
+		mutex_unlock(&vgpu->dmabuf_lock);
+	} else {
+		/* vgpu is NULL, as it has been removed already */
+		gem_obj->base.dma_buf = NULL;
+		dmabuf_obj_put(obj);
+	}
+}
+
+static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = {
+	.name = "i915_gem_object_vgpu",
+	.flags = I915_GEM_OBJECT_IS_PROXY,
+	.get_pages = vgpu_gem_get_pages,
+	.put_pages = vgpu_gem_put_pages,
+	.release = vgpu_gem_release,
+};
+
+static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
+		struct intel_vgpu_fb_info *info)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_object_alloc();
+	if (obj == NULL)
+		return NULL;
+
+	drm_gem_private_object_init(dev, &obj->base,
+		roundup(info->size, PAGE_SIZE));
+	i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class, 0);
+	i915_gem_object_set_readonly(obj);
+
+	obj->read_domains = I915_GEM_DOMAIN_GTT;
+	obj->write_domain = 0;
+	if (GRAPHICS_VER(dev_priv) >= 9) {
+		unsigned int tiling_mode = 0;
+		unsigned int stride = 0;
+
+		switch (info->drm_format_mod) {
+		case DRM_FORMAT_MOD_LINEAR:
+			tiling_mode = I915_TILING_NONE;
+			break;
+		case I915_FORMAT_MOD_X_TILED:
+			tiling_mode = I915_TILING_X;
+			stride = info->stride;
+			break;
+		case I915_FORMAT_MOD_Y_TILED:
+		case I915_FORMAT_MOD_Yf_TILED:
+			tiling_mode = I915_TILING_Y;
+			stride = info->stride;
+			break;
+		default:
+			gvt_dbg_core("invalid drm_format_mod %llx for tiling\n",
+				     info->drm_format_mod);
+		}
+		obj->tiling_and_stride = tiling_mode | stride;
+	} else {
+		obj->tiling_and_stride = info->drm_format_mod ?
+					I915_TILING_X : 0;
+	}
+
+	return obj;
+}
+
+static bool validate_hotspot(struct intel_vgpu_cursor_plane_format *c)
+{
+	if (c && c->x_hot <= c->width && c->y_hot <= c->height)
+		return true;
+	else
+		return false;
+}
+
+static int vgpu_get_plane_info(struct drm_device *dev,
+		struct intel_vgpu *vgpu,
+		struct intel_vgpu_fb_info *info,
+		int plane_id)
+{
+	struct intel_vgpu_primary_plane_format p;
+	struct intel_vgpu_cursor_plane_format c;
+	int ret, tile_height = 1;
+
+	memset(info, 0, sizeof(*info));
+
+	if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
+		ret = intel_vgpu_decode_primary_plane(vgpu, &p);
+		if (ret)
+			return ret;
+		info->start = p.base;
+		info->start_gpa = p.base_gpa;
+		info->width = p.width;
+		info->height = p.height;
+		info->stride = p.stride;
+		info->drm_format = p.drm_format;
+
+		switch (p.tiled) {
+		case PLANE_CTL_TILED_LINEAR:
+			info->drm_format_mod = DRM_FORMAT_MOD_LINEAR;
+			break;
+		case PLANE_CTL_TILED_X:
+			info->drm_format_mod = I915_FORMAT_MOD_X_TILED;
+			tile_height = 8;
+			break;
+		case PLANE_CTL_TILED_Y:
+			info->drm_format_mod = I915_FORMAT_MOD_Y_TILED;
+			tile_height = 32;
+			break;
+		case PLANE_CTL_TILED_YF:
+			info->drm_format_mod = I915_FORMAT_MOD_Yf_TILED;
+			tile_height = 32;
+			break;
+		default:
+			gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled);
+		}
+	} else if (plane_id == DRM_PLANE_TYPE_CURSOR) {
+		ret = intel_vgpu_decode_cursor_plane(vgpu, &c);
+		if (ret)
+			return ret;
+		info->start = c.base;
+		info->start_gpa = c.base_gpa;
+		info->width = c.width;
+		info->height = c.height;
+		info->stride = c.width * (c.bpp / 8);
+		info->drm_format = c.drm_format;
+		info->drm_format_mod = 0;
+		info->x_pos = c.x_pos;
+		info->y_pos = c.y_pos;
+
+		if (validate_hotspot(&c)) {
+			info->x_hot = c.x_hot;
+			info->y_hot = c.y_hot;
+		} else {
+			info->x_hot = UINT_MAX;
+			info->y_hot = UINT_MAX;
+		}
+	} else {
+		gvt_vgpu_err("invalid plane id:%d\n", plane_id);
+		return -EINVAL;
+	}
+
+	info->size = info->stride * roundup(info->height, tile_height);
+	if (info->size == 0) {
+		gvt_vgpu_err("fb size is zero\n");
+		return -EINVAL;
+	}
+
+	if (info->start & (PAGE_SIZE - 1)) {
+		gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start);
+		return -EFAULT;
+	}
+
+	if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) {
+		gvt_vgpu_err("invalid gma addr\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static struct intel_vgpu_dmabuf_obj *
+pick_dmabuf_by_info(struct intel_vgpu *vgpu,
+		    struct intel_vgpu_fb_info *latest_info)
+{
+	struct list_head *pos;
+	struct intel_vgpu_fb_info *fb_info;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj = NULL;
+	struct intel_vgpu_dmabuf_obj *ret = NULL;
+
+	list_for_each(pos, &vgpu->dmabuf_obj_list_head) {
+		dmabuf_obj = list_entry(pos, struct intel_vgpu_dmabuf_obj, list);
+		if (!dmabuf_obj->info)
+			continue;
+
+		fb_info = (struct intel_vgpu_fb_info *)dmabuf_obj->info;
+		if ((fb_info->start == latest_info->start) &&
+		    (fb_info->start_gpa == latest_info->start_gpa) &&
+		    (fb_info->size == latest_info->size) &&
+		    (fb_info->drm_format_mod == latest_info->drm_format_mod) &&
+		    (fb_info->drm_format == latest_info->drm_format) &&
+		    (fb_info->width == latest_info->width) &&
+		    (fb_info->height == latest_info->height)) {
+			ret = dmabuf_obj;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static struct intel_vgpu_dmabuf_obj *
+pick_dmabuf_by_num(struct intel_vgpu *vgpu, u32 id)
+{
+	struct list_head *pos;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj = NULL;
+	struct intel_vgpu_dmabuf_obj *ret = NULL;
+
+	list_for_each(pos, &vgpu->dmabuf_obj_list_head) {
+		dmabuf_obj = list_entry(pos, struct intel_vgpu_dmabuf_obj, list);
+		if (dmabuf_obj->dmabuf_id == id) {
+			ret = dmabuf_obj;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static void update_fb_info(struct vfio_device_gfx_plane_info *gvt_dmabuf,
+		      struct intel_vgpu_fb_info *fb_info)
+{
+	gvt_dmabuf->drm_format = fb_info->drm_format;
+	gvt_dmabuf->drm_format_mod = fb_info->drm_format_mod;
+	gvt_dmabuf->width = fb_info->width;
+	gvt_dmabuf->height = fb_info->height;
+	gvt_dmabuf->stride = fb_info->stride;
+	gvt_dmabuf->size = fb_info->size;
+	gvt_dmabuf->x_pos = fb_info->x_pos;
+	gvt_dmabuf->y_pos = fb_info->y_pos;
+	gvt_dmabuf->x_hot = fb_info->x_hot;
+	gvt_dmabuf->y_hot = fb_info->y_hot;
+}
+
+int intel_vgpu_query_plane(struct intel_vgpu *vgpu, void *args)
+{
+	struct drm_device *dev = &vgpu->gvt->gt->i915->drm;
+	struct vfio_device_gfx_plane_info *gfx_plane_info = args;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj;
+	struct intel_vgpu_fb_info fb_info;
+	int ret = 0;
+
+	if (gfx_plane_info->flags == (VFIO_GFX_PLANE_TYPE_DMABUF |
+				       VFIO_GFX_PLANE_TYPE_PROBE))
+		return ret;
+	else if ((gfx_plane_info->flags & ~VFIO_GFX_PLANE_TYPE_DMABUF) ||
+			(!gfx_plane_info->flags))
+		return -EINVAL;
+
+	ret = vgpu_get_plane_info(dev, vgpu, &fb_info,
+					gfx_plane_info->drm_plane_type);
+	if (ret != 0)
+		goto out;
+
+	mutex_lock(&vgpu->dmabuf_lock);
+	/* If exists, pick up the exposed dmabuf_obj */
+	dmabuf_obj = pick_dmabuf_by_info(vgpu, &fb_info);
+	if (dmabuf_obj) {
+		update_fb_info(gfx_plane_info, &fb_info);
+		gfx_plane_info->dmabuf_id = dmabuf_obj->dmabuf_id;
+
+		/* This buffer may be released between query_plane ioctl and
+		 * get_dmabuf ioctl. Add the refcount to make sure it won't
+		 * be released between the two ioctls.
+		 */
+		if (!dmabuf_obj->initref) {
+			dmabuf_obj->initref = true;
+			dmabuf_obj_get(dmabuf_obj);
+		}
+		ret = 0;
+		gvt_dbg_dpy("vgpu%d: re-use dmabuf_obj ref %d, id %d\n",
+			    vgpu->id, kref_read(&dmabuf_obj->kref),
+			    gfx_plane_info->dmabuf_id);
+		mutex_unlock(&vgpu->dmabuf_lock);
+		goto out;
+	}
+
+	mutex_unlock(&vgpu->dmabuf_lock);
+
+	/* Need to allocate a new one*/
+	dmabuf_obj = kmalloc(sizeof(struct intel_vgpu_dmabuf_obj), GFP_KERNEL);
+	if (unlikely(!dmabuf_obj)) {
+		gvt_vgpu_err("alloc dmabuf_obj failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	dmabuf_obj->info = kmalloc(sizeof(struct intel_vgpu_fb_info),
+				   GFP_KERNEL);
+	if (unlikely(!dmabuf_obj->info)) {
+		gvt_vgpu_err("allocate intel vgpu fb info failed\n");
+		ret = -ENOMEM;
+		goto out_free_dmabuf;
+	}
+	memcpy(dmabuf_obj->info, &fb_info, sizeof(struct intel_vgpu_fb_info));
+
+	((struct intel_vgpu_fb_info *)dmabuf_obj->info)->obj = dmabuf_obj;
+
+	dmabuf_obj->vgpu = vgpu;
+
+	ret = idr_alloc(&vgpu->object_idr, dmabuf_obj, 1, 0, GFP_NOWAIT);
+	if (ret < 0)
+		goto out_free_info;
+	gfx_plane_info->dmabuf_id = ret;
+	dmabuf_obj->dmabuf_id = ret;
+
+	dmabuf_obj->initref = true;
+
+	kref_init(&dmabuf_obj->kref);
+
+	update_fb_info(gfx_plane_info, &fb_info);
+
+	INIT_LIST_HEAD(&dmabuf_obj->list);
+	mutex_lock(&vgpu->dmabuf_lock);
+	list_add_tail(&dmabuf_obj->list, &vgpu->dmabuf_obj_list_head);
+	mutex_unlock(&vgpu->dmabuf_lock);
+
+	gvt_dbg_dpy("vgpu%d: %s new dmabuf_obj ref %d, id %d\n", vgpu->id,
+		    __func__, kref_read(&dmabuf_obj->kref), ret);
+
+	return 0;
+
+out_free_info:
+	kfree(dmabuf_obj->info);
+out_free_dmabuf:
+	kfree(dmabuf_obj);
+out:
+	/* ENODEV means plane isn't ready, which might be a normal case. */
+	return (ret == -ENODEV) ? 0 : ret;
+}
+
+/* To associate an exposed dmabuf with the dmabuf_obj */
+int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
+{
+	struct drm_device *dev = &vgpu->gvt->gt->i915->drm;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	int dmabuf_fd;
+	int ret = 0;
+
+	mutex_lock(&vgpu->dmabuf_lock);
+
+	dmabuf_obj = pick_dmabuf_by_num(vgpu, dmabuf_id);
+	if (dmabuf_obj == NULL) {
+		gvt_vgpu_err("invalid dmabuf id:%d\n", dmabuf_id);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	obj = vgpu_create_gem(dev, dmabuf_obj->info);
+	if (obj == NULL) {
+		gvt_vgpu_err("create gvt gem obj failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	obj->gvt_info = dmabuf_obj->info;
+
+	dmabuf = i915_gem_prime_export(&obj->base, DRM_CLOEXEC | DRM_RDWR);
+	if (IS_ERR(dmabuf)) {
+		gvt_vgpu_err("export dma-buf failed\n");
+		ret = PTR_ERR(dmabuf);
+		goto out_free_gem;
+	}
+
+	ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR);
+	if (ret < 0) {
+		gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret);
+		goto out_free_dmabuf;
+	}
+	dmabuf_fd = ret;
+
+	dmabuf_obj_get(dmabuf_obj);
+
+	if (dmabuf_obj->initref) {
+		dmabuf_obj->initref = false;
+		dmabuf_obj_put(dmabuf_obj);
+	}
+
+	mutex_unlock(&vgpu->dmabuf_lock);
+
+	gvt_dbg_dpy("vgpu%d: dmabuf:%d, dmabuf ref %d, fd:%d\n"
+		    "        file count: %ld, GEM ref: %d\n",
+		    vgpu->id, dmabuf_obj->dmabuf_id,
+		    kref_read(&dmabuf_obj->kref),
+		    dmabuf_fd,
+		    file_count(dmabuf->file),
+		    kref_read(&obj->base.refcount));
+
+	i915_gem_object_put(obj);
+
+	return dmabuf_fd;
+
+out_free_dmabuf:
+	dma_buf_put(dmabuf);
+out_free_gem:
+	i915_gem_object_put(obj);
+out:
+	mutex_unlock(&vgpu->dmabuf_lock);
+	return ret;
+}
+
+void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj;
+
+	mutex_lock(&vgpu->dmabuf_lock);
+	list_for_each_safe(pos, n, &vgpu->dmabuf_obj_list_head) {
+		dmabuf_obj = list_entry(pos, struct intel_vgpu_dmabuf_obj, list);
+		dmabuf_obj->vgpu = NULL;
+
+		idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id);
+		list_del(pos);
+
+		/* dmabuf_obj might be freed in dmabuf_obj_put */
+		if (dmabuf_obj->initref) {
+			dmabuf_obj->initref = false;
+			dmabuf_obj_put(dmabuf_obj);
+		}
+
+	}
+	mutex_unlock(&vgpu->dmabuf_lock);
+}
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.h b/drivers/gpu/drm/i915/gvt/dmabuf.h
new file mode 100644
index 0000000000..3dcdb6570e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Xiaoguang Chen
+ *    Tina Zhang <tina.zhang@intel.com>
+ */
+
+#ifndef _GVT_DMABUF_H_
+#define _GVT_DMABUF_H_
+#include <linux/vfio.h>
+
+struct intel_vgpu_fb_info {
+	__u64 start;
+	__u64 start_gpa;
+	__u64 drm_format_mod;
+	__u32 drm_format;	/* drm format of plane */
+	__u32 width;	/* width of plane */
+	__u32 height;	/* height of plane */
+	__u32 stride;	/* stride of plane */
+	__u32 size;	/* size of plane in bytes, align on page */
+	__u32 x_pos;	/* horizontal position of cursor plane */
+	__u32 y_pos;	/* vertical position of cursor plane */
+	__u32 x_hot;    /* horizontal position of cursor hotspot */
+	__u32 y_hot;    /* vertical position of cursor hotspot */
+	struct intel_vgpu_dmabuf_obj *obj;
+};
+
+/*
+ * struct intel_vgpu_dmabuf_obj- Intel vGPU device buffer object
+ */
+struct intel_vgpu_dmabuf_obj {
+	struct intel_vgpu *vgpu;
+	struct intel_vgpu_fb_info *info;
+	__u32 dmabuf_id;
+	struct kref kref;
+	bool initref;
+	struct list_head list;
+};
+
+int intel_vgpu_query_plane(struct intel_vgpu *vgpu, void *args);
+int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id);
+void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
new file mode 100644
index 0000000000..af9afdb53c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Ke Yu
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Terrence Xu <terrence.xu@intel.com>
+ *    Changbin Du <changbin.du@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#include "display/intel_dp_aux_regs.h"
+#include "display/intel_gmbus_regs.h"
+#include "gvt.h"
+#include "i915_drv.h"
+#include "i915_reg.h"
+
+#define GMBUS1_TOTAL_BYTES_SHIFT 16
+#define GMBUS1_TOTAL_BYTES_MASK 0x1ff
+#define gmbus1_total_byte_count(v) (((v) >> \
+	GMBUS1_TOTAL_BYTES_SHIFT) & GMBUS1_TOTAL_BYTES_MASK)
+#define gmbus1_slave_addr(v) (((v) & 0xff) >> 1)
+#define gmbus1_slave_index(v) (((v) >> 8) & 0xff)
+#define gmbus1_bus_cycle(v) (((v) >> 25) & 0x7)
+
+/* GMBUS0 bits definitions */
+#define _GMBUS_PIN_SEL_MASK     (0x7)
+
+static unsigned char edid_get_byte(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_i2c_edid *edid = &vgpu->display.i2c_edid;
+	unsigned char chr = 0;
+
+	if (edid->state == I2C_NOT_SPECIFIED || !edid->slave_selected) {
+		gvt_vgpu_err("Driver tries to read EDID without proper sequence!\n");
+		return 0;
+	}
+	if (edid->current_edid_read >= EDID_SIZE) {
+		gvt_vgpu_err("edid_get_byte() exceeds the size of EDID!\n");
+		return 0;
+	}
+
+	if (!edid->edid_available) {
+		gvt_vgpu_err("Reading EDID but EDID is not available!\n");
+		return 0;
+	}
+
+	if (intel_vgpu_has_monitor_on_port(vgpu, edid->port)) {
+		struct intel_vgpu_edid_data *edid_data =
+			intel_vgpu_port(vgpu, edid->port)->edid;
+
+		chr = edid_data->edid_block[edid->current_edid_read];
+		edid->current_edid_read++;
+	} else {
+		gvt_vgpu_err("No EDID available during the reading?\n");
+	}
+	return chr;
+}
+
+static inline int cnp_get_port_from_gmbus0(u32 gmbus0)
+{
+	int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK;
+	int port = -EINVAL;
+
+	if (port_select == GMBUS_PIN_1_BXT)
+		port = PORT_B;
+	else if (port_select == GMBUS_PIN_2_BXT)
+		port = PORT_C;
+	else if (port_select == GMBUS_PIN_3_BXT)
+		port = PORT_D;
+	else if (port_select == GMBUS_PIN_4_CNP)
+		port = PORT_E;
+	return port;
+}
+
+static inline int bxt_get_port_from_gmbus0(u32 gmbus0)
+{
+	int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK;
+	int port = -EINVAL;
+
+	if (port_select == GMBUS_PIN_1_BXT)
+		port = PORT_B;
+	else if (port_select == GMBUS_PIN_2_BXT)
+		port = PORT_C;
+	else if (port_select == GMBUS_PIN_3_BXT)
+		port = PORT_D;
+	return port;
+}
+
+static inline int get_port_from_gmbus0(u32 gmbus0)
+{
+	int port_select = gmbus0 & _GMBUS_PIN_SEL_MASK;
+	int port = -EINVAL;
+
+	if (port_select == GMBUS_PIN_VGADDC)
+		port = PORT_E;
+	else if (port_select == GMBUS_PIN_DPC)
+		port = PORT_C;
+	else if (port_select == GMBUS_PIN_DPB)
+		port = PORT_B;
+	else if (port_select == GMBUS_PIN_DPD)
+		port = PORT_D;
+	return port;
+}
+
+static void reset_gmbus_controller(struct intel_vgpu *vgpu)
+{
+	vgpu_vreg_t(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY;
+	if (!vgpu->display.i2c_edid.edid_available)
+		vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
+	vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE;
+}
+
+/* GMBUS0 */
+static int gmbus0_mmio_write(struct intel_vgpu *vgpu,
+			unsigned int offset, void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	int port, pin_select;
+
+	memcpy(&vgpu_vreg(vgpu, offset), p_data, bytes);
+
+	pin_select = vgpu_vreg(vgpu, offset) & _GMBUS_PIN_SEL_MASK;
+
+	intel_vgpu_init_i2c_edid(vgpu);
+
+	if (pin_select == 0)
+		return 0;
+
+	if (IS_BROXTON(i915))
+		port = bxt_get_port_from_gmbus0(pin_select);
+	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
+		port = cnp_get_port_from_gmbus0(pin_select);
+	else
+		port = get_port_from_gmbus0(pin_select);
+	if (drm_WARN_ON(&i915->drm, port < 0))
+		return 0;
+
+	vgpu->display.i2c_edid.state = I2C_GMBUS;
+	vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE;
+
+	vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
+	vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE;
+
+	if (intel_vgpu_has_monitor_on_port(vgpu, port) &&
+			!intel_vgpu_port_is_dp(vgpu, port)) {
+		vgpu->display.i2c_edid.port = port;
+		vgpu->display.i2c_edid.edid_available = true;
+		vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER;
+	} else
+		vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
+	return 0;
+}
+
+static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	struct intel_vgpu_i2c_edid *i2c_edid = &vgpu->display.i2c_edid;
+	u32 slave_addr;
+	u32 wvalue = *(u32 *)p_data;
+
+	if (vgpu_vreg(vgpu, offset) & GMBUS_SW_CLR_INT) {
+		if (!(wvalue & GMBUS_SW_CLR_INT)) {
+			vgpu_vreg(vgpu, offset) &= ~GMBUS_SW_CLR_INT;
+			reset_gmbus_controller(vgpu);
+		}
+		/*
+		 * TODO: "This bit is cleared to zero when an event
+		 * causes the HW_RDY bit transition to occur "
+		 */
+	} else {
+		/*
+		 * per bspec setting this bit can cause:
+		 * 1) INT status bit cleared
+		 * 2) HW_RDY bit asserted
+		 */
+		if (wvalue & GMBUS_SW_CLR_INT) {
+			vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_INT;
+			vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY;
+		}
+
+		/* For virtualization, we suppose that HW is always ready,
+		 * so GMBUS_SW_RDY should always be cleared
+		 */
+		if (wvalue & GMBUS_SW_RDY)
+			wvalue &= ~GMBUS_SW_RDY;
+
+		i2c_edid->gmbus.total_byte_count =
+			gmbus1_total_byte_count(wvalue);
+		slave_addr = gmbus1_slave_addr(wvalue);
+
+		/* vgpu gmbus only support EDID */
+		if (slave_addr == EDID_ADDR) {
+			i2c_edid->slave_selected = true;
+		} else if (slave_addr != 0) {
+			gvt_dbg_dpy(
+				"vgpu%d: unsupported gmbus slave addr(0x%x)\n"
+				"	gmbus operations will be ignored.\n",
+					vgpu->id, slave_addr);
+		}
+
+		if (wvalue & GMBUS_CYCLE_INDEX)
+			i2c_edid->current_edid_read =
+				gmbus1_slave_index(wvalue);
+
+		i2c_edid->gmbus.cycle_type = gmbus1_bus_cycle(wvalue);
+		switch (gmbus1_bus_cycle(wvalue)) {
+		case GMBUS_NOCYCLE:
+			break;
+		case GMBUS_STOP:
+			/* From spec:
+			 * This can only cause a STOP to be generated
+			 * if a GMBUS cycle is generated, the GMBUS is
+			 * currently in a data/wait/idle phase, or it is in a
+			 * WAIT phase
+			 */
+			if (gmbus1_bus_cycle(vgpu_vreg(vgpu, offset))
+				!= GMBUS_NOCYCLE) {
+				intel_vgpu_init_i2c_edid(vgpu);
+				/* After the 'stop' cycle, hw state would become
+				 * 'stop phase' and then 'idle phase' after a
+				 * few milliseconds. In emulation, we just set
+				 * it as 'idle phase' ('stop phase' is not
+				 * visible in gmbus interface)
+				 */
+				i2c_edid->gmbus.phase = GMBUS_IDLE_PHASE;
+				vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
+			}
+			break;
+		case NIDX_NS_W:
+		case IDX_NS_W:
+		case NIDX_STOP:
+		case IDX_STOP:
+			/* From hw spec the GMBUS phase
+			 * transition like this:
+			 * START (-->INDEX) -->DATA
+			 */
+			i2c_edid->gmbus.phase = GMBUS_DATA_PHASE;
+			vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE;
+			break;
+		default:
+			gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n");
+			break;
+		}
+		/*
+		 * From hw spec the WAIT state will be
+		 * cleared:
+		 * (1) in a new GMBUS cycle
+		 * (2) by generating a stop
+		 */
+		vgpu_vreg(vgpu, offset) = wvalue;
+	}
+	return 0;
+}
+
+static int gmbus3_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+
+	drm_WARN_ON(&i915->drm, 1);
+	return 0;
+}
+
+static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	int i;
+	unsigned char byte_data;
+	struct intel_vgpu_i2c_edid *i2c_edid = &vgpu->display.i2c_edid;
+	int byte_left = i2c_edid->gmbus.total_byte_count -
+				i2c_edid->current_edid_read;
+	int byte_count = byte_left;
+	u32 reg_data = 0;
+
+	/* Data can only be recevied if previous settings correct */
+	if (vgpu_vreg_t(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) {
+		if (byte_left <= 0) {
+			memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
+			return 0;
+		}
+
+		if (byte_count > 4)
+			byte_count = 4;
+		for (i = 0; i < byte_count; i++) {
+			byte_data = edid_get_byte(vgpu);
+			reg_data |= (byte_data << (i << 3));
+		}
+
+		memcpy(&vgpu_vreg(vgpu, offset), &reg_data, byte_count);
+		memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
+
+		if (byte_left <= 4) {
+			switch (i2c_edid->gmbus.cycle_type) {
+			case NIDX_STOP:
+			case IDX_STOP:
+				i2c_edid->gmbus.phase = GMBUS_IDLE_PHASE;
+				break;
+			case NIDX_NS_W:
+			case IDX_NS_W:
+			default:
+				i2c_edid->gmbus.phase = GMBUS_WAIT_PHASE;
+				break;
+			}
+			intel_vgpu_init_i2c_edid(vgpu);
+		}
+		/*
+		 * Read GMBUS3 during send operation,
+		 * return the latest written value
+		 */
+	} else {
+		memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
+		gvt_vgpu_err("warning: gmbus3 read with nothing returned\n");
+	}
+	return 0;
+}
+
+static int gmbus2_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 value = vgpu_vreg(vgpu, offset);
+
+	if (!(vgpu_vreg(vgpu, offset) & GMBUS_INUSE))
+		vgpu_vreg(vgpu, offset) |= GMBUS_INUSE;
+	memcpy(p_data, (void *)&value, bytes);
+	return 0;
+}
+
+static int gmbus2_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 wvalue = *(u32 *)p_data;
+
+	if (wvalue & GMBUS_INUSE)
+		vgpu_vreg(vgpu, offset) &= ~GMBUS_INUSE;
+	/* All other bits are read-only */
+	return 0;
+}
+
+/**
+ * intel_gvt_i2c_handle_gmbus_read - emulate gmbus register mmio read
+ * @vgpu: a vGPU
+ * @offset: reg offset
+ * @p_data: data return buffer
+ * @bytes: access data length
+ *
+ * This function is used to emulate gmbus register mmio read
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_gvt_i2c_handle_gmbus_read(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+
+	if (drm_WARN_ON(&i915->drm, bytes > 8 && (offset & (bytes - 1))))
+		return -EINVAL;
+
+	if (offset == i915_mmio_reg_offset(PCH_GMBUS2))
+		return gmbus2_mmio_read(vgpu, offset, p_data, bytes);
+	else if (offset == i915_mmio_reg_offset(PCH_GMBUS3))
+		return gmbus3_mmio_read(vgpu, offset, p_data, bytes);
+
+	memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
+	return 0;
+}
+
+/**
+ * intel_gvt_i2c_handle_gmbus_write - emulate gmbus register mmio write
+ * @vgpu: a vGPU
+ * @offset: reg offset
+ * @p_data: data return buffer
+ * @bytes: access data length
+ *
+ * This function is used to emulate gmbus register mmio write
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_gvt_i2c_handle_gmbus_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+
+	if (drm_WARN_ON(&i915->drm, bytes > 8 && (offset & (bytes - 1))))
+		return -EINVAL;
+
+	if (offset == i915_mmio_reg_offset(PCH_GMBUS0))
+		return gmbus0_mmio_write(vgpu, offset, p_data, bytes);
+	else if (offset == i915_mmio_reg_offset(PCH_GMBUS1))
+		return gmbus1_mmio_write(vgpu, offset, p_data, bytes);
+	else if (offset == i915_mmio_reg_offset(PCH_GMBUS2))
+		return gmbus2_mmio_write(vgpu, offset, p_data, bytes);
+	else if (offset == i915_mmio_reg_offset(PCH_GMBUS3))
+		return gmbus3_mmio_write(vgpu, offset, p_data, bytes);
+
+	memcpy(&vgpu_vreg(vgpu, offset), p_data, bytes);
+	return 0;
+}
+
+enum {
+	AUX_CH_CTL = 0,
+	AUX_CH_DATA1,
+	AUX_CH_DATA2,
+	AUX_CH_DATA3,
+	AUX_CH_DATA4,
+	AUX_CH_DATA5
+};
+
+static inline int get_aux_ch_reg(unsigned int offset)
+{
+	int reg;
+
+	switch (offset & 0xff) {
+	case 0x10:
+		reg = AUX_CH_CTL;
+		break;
+	case 0x14:
+		reg = AUX_CH_DATA1;
+		break;
+	case 0x18:
+		reg = AUX_CH_DATA2;
+		break;
+	case 0x1c:
+		reg = AUX_CH_DATA3;
+		break;
+	case 0x20:
+		reg = AUX_CH_DATA4;
+		break;
+	case 0x24:
+		reg = AUX_CH_DATA5;
+		break;
+	default:
+		reg = -1;
+		break;
+	}
+	return reg;
+}
+
+/**
+ * intel_gvt_i2c_handle_aux_ch_write - emulate AUX channel register write
+ * @vgpu: a vGPU
+ * @port_idx: port index
+ * @offset: reg offset
+ * @p_data: write ptr
+ *
+ * This function is used to emulate AUX channel register write
+ *
+ */
+void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
+				int port_idx,
+				unsigned int offset,
+				void *p_data)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_vgpu_i2c_edid *i2c_edid = &vgpu->display.i2c_edid;
+	int msg_length, ret_msg_size;
+	int msg, addr, ctrl, op;
+	u32 value = *(u32 *)p_data;
+	int aux_data_for_write = 0;
+	int reg = get_aux_ch_reg(offset);
+
+	if (reg != AUX_CH_CTL) {
+		vgpu_vreg(vgpu, offset) = value;
+		return;
+	}
+
+	msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, value);
+
+	// check the msg in DATA register.
+	msg = vgpu_vreg(vgpu, offset + 4);
+	addr = (msg >> 8) & 0xffff;
+	ctrl = (msg >> 24) & 0xff;
+	op = ctrl >> 4;
+	if (!(value & DP_AUX_CH_CTL_SEND_BUSY)) {
+		/* The ctl write to clear some states */
+		return;
+	}
+
+	/* Always set the wanted value for vms. */
+	ret_msg_size = (((op & 0x1) == GVT_AUX_I2C_READ) ? 2 : 1);
+	vgpu_vreg(vgpu, offset) =
+		DP_AUX_CH_CTL_DONE |
+		DP_AUX_CH_CTL_MESSAGE_SIZE(ret_msg_size);
+
+	if (msg_length == 3) {
+		if (!(op & GVT_AUX_I2C_MOT)) {
+			/* stop */
+			intel_vgpu_init_i2c_edid(vgpu);
+		} else {
+			/* start or restart */
+			i2c_edid->aux_ch.i2c_over_aux_ch = true;
+			i2c_edid->aux_ch.aux_ch_mot = true;
+			if (addr == 0) {
+				/* reset the address */
+				intel_vgpu_init_i2c_edid(vgpu);
+			} else if (addr == EDID_ADDR) {
+				i2c_edid->state = I2C_AUX_CH;
+				i2c_edid->port = port_idx;
+				i2c_edid->slave_selected = true;
+				if (intel_vgpu_has_monitor_on_port(vgpu,
+					port_idx) &&
+					intel_vgpu_port_is_dp(vgpu, port_idx))
+					i2c_edid->edid_available = true;
+			}
+		}
+	} else if ((op & 0x1) == GVT_AUX_I2C_WRITE) {
+		/* TODO
+		 * We only support EDID reading from I2C_over_AUX. And
+		 * we do not expect the index mode to be used. Right now
+		 * the WRITE operation is ignored. It is good enough to
+		 * support the gfx driver to do EDID access.
+		 */
+	} else {
+		if (drm_WARN_ON(&i915->drm, (op & 0x1) != GVT_AUX_I2C_READ))
+			return;
+		if (drm_WARN_ON(&i915->drm, msg_length != 4))
+			return;
+		if (i2c_edid->edid_available && i2c_edid->slave_selected) {
+			unsigned char val = edid_get_byte(vgpu);
+
+			aux_data_for_write = (val << 16);
+		} else
+			aux_data_for_write = (0xff << 16);
+	}
+	/* write the return value in AUX_CH_DATA reg which includes:
+	 * ACK of I2C_WRITE
+	 * returned byte if it is READ
+	 */
+	aux_data_for_write |= GVT_AUX_I2C_REPLY_ACK << 24;
+	vgpu_vreg(vgpu, offset + 4) = aux_data_for_write;
+}
+
+/**
+ * intel_vgpu_init_i2c_edid - initialize vGPU i2c edid emulation
+ * @vgpu: a vGPU
+ *
+ * This function is used to initialize vGPU i2c edid emulation stuffs
+ *
+ */
+void intel_vgpu_init_i2c_edid(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_i2c_edid *edid = &vgpu->display.i2c_edid;
+
+	edid->state = I2C_NOT_SPECIFIED;
+
+	edid->port = -1;
+	edid->slave_selected = false;
+	edid->edid_available = false;
+	edid->current_edid_read = 0;
+
+	memset(&edid->gmbus, 0, sizeof(struct intel_vgpu_i2c_gmbus));
+
+	edid->aux_ch.i2c_over_aux_ch = false;
+	edid->aux_ch.aux_ch_mot = false;
+}
diff --git a/drivers/gpu/drm/i915/gvt/edid.h b/drivers/gpu/drm/i915/gvt/edid.h
new file mode 100644
index 0000000000..dfe0cbc6aa
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/edid.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Ke Yu
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Terrence Xu <terrence.xu@intel.com>
+ *    Changbin Du <changbin.du@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#ifndef _GVT_EDID_H_
+#define _GVT_EDID_H_
+
+#include <linux/types.h>
+
+struct intel_vgpu;
+
+#define EDID_SIZE		128
+#define EDID_ADDR		0x50 /* Linux hvm EDID addr */
+
+#define GVT_AUX_NATIVE_WRITE			0x8
+#define GVT_AUX_NATIVE_READ			0x9
+#define GVT_AUX_I2C_WRITE			0x0
+#define GVT_AUX_I2C_READ			0x1
+#define GVT_AUX_I2C_STATUS			0x2
+#define GVT_AUX_I2C_MOT				0x4
+#define GVT_AUX_I2C_REPLY_ACK			0x0
+
+struct intel_vgpu_edid_data {
+	bool data_valid;
+	unsigned char edid_block[EDID_SIZE];
+};
+
+enum gmbus_cycle_type {
+	GMBUS_NOCYCLE	= 0x0,
+	NIDX_NS_W	= 0x1,
+	IDX_NS_W	= 0x3,
+	GMBUS_STOP	= 0x4,
+	NIDX_STOP	= 0x5,
+	IDX_STOP	= 0x7
+};
+
+/*
+ * States of GMBUS
+ *
+ * GMBUS0-3 could be related to the EDID virtualization. Another two GMBUS
+ * registers, GMBUS4 (interrupt mask) and GMBUS5 (2 byte indes register), are
+ * not considered here. Below describes the usage of GMBUS registers that are
+ * cared by the EDID virtualization
+ *
+ * GMBUS0:
+ *      R/W
+ *      port selection. value of bit0 - bit2 corresponds to the GPIO registers.
+ *
+ * GMBUS1:
+ *      R/W Protect
+ *      Command and Status.
+ *      bit0 is the direction bit: 1 is read; 0 is write.
+ *      bit1 - bit7 is slave 7-bit address.
+ *      bit16 - bit24 total byte count (ignore?)
+ *
+ * GMBUS2:
+ *      Most of bits are read only except bit 15 (IN_USE)
+ *      Status register
+ *      bit0 - bit8 current byte count
+ *      bit 11: hardware ready;
+ *
+ * GMBUS3:
+ *      Read/Write
+ *      Data for transfer
+ */
+
+/* From hw specs, Other phases like START, ADDRESS, INDEX
+ * are invisible to GMBUS MMIO interface. So no definitions
+ * in below enum types
+ */
+enum gvt_gmbus_phase {
+	GMBUS_IDLE_PHASE = 0,
+	GMBUS_DATA_PHASE,
+	GMBUS_WAIT_PHASE,
+	//GMBUS_STOP_PHASE,
+	GMBUS_MAX_PHASE
+};
+
+struct intel_vgpu_i2c_gmbus {
+	unsigned int total_byte_count; /* from GMBUS1 */
+	enum gmbus_cycle_type cycle_type;
+	enum gvt_gmbus_phase phase;
+};
+
+struct intel_vgpu_i2c_aux_ch {
+	bool i2c_over_aux_ch;
+	bool aux_ch_mot;
+};
+
+enum i2c_state {
+	I2C_NOT_SPECIFIED = 0,
+	I2C_GMBUS = 1,
+	I2C_AUX_CH = 2
+};
+
+/* I2C sequences cannot interleave.
+ * GMBUS and AUX_CH sequences cannot interleave.
+ */
+struct intel_vgpu_i2c_edid {
+	enum i2c_state state;
+
+	unsigned int port;
+	bool slave_selected;
+	bool edid_available;
+	unsigned int current_edid_read;
+
+	struct intel_vgpu_i2c_gmbus gmbus;
+	struct intel_vgpu_i2c_aux_ch aux_ch;
+};
+
+void intel_vgpu_init_i2c_edid(struct intel_vgpu *vgpu);
+
+int intel_gvt_i2c_handle_gmbus_read(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes);
+
+int intel_gvt_i2c_handle_gmbus_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes);
+
+void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
+		int port_idx,
+		unsigned int offset,
+		void *p_data);
+
+#endif /*_GVT_EDID_H_*/
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
new file mode 100644
index 0000000000..274c6ef424
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -0,0 +1,558 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "gvt.h"
+
+#define _EL_OFFSET_STATUS       0x234
+#define _EL_OFFSET_STATUS_BUF   0x370
+#define _EL_OFFSET_STATUS_PTR   0x3A0
+
+#define execlist_ring_mmio(e, offset) ((e)->mmio_base + (offset))
+
+#define valid_context(ctx) ((ctx)->valid)
+#define same_context(a, b) (((a)->context_id == (b)->context_id) && \
+		((a)->lrca == (b)->lrca))
+
+static int context_switch_events[] = {
+	[RCS0]  = RCS_AS_CONTEXT_SWITCH,
+	[BCS0]  = BCS_AS_CONTEXT_SWITCH,
+	[VCS0]  = VCS_AS_CONTEXT_SWITCH,
+	[VCS1]  = VCS2_AS_CONTEXT_SWITCH,
+	[VECS0] = VECS_AS_CONTEXT_SWITCH,
+};
+
+static int to_context_switch_event(const struct intel_engine_cs *engine)
+{
+	if (WARN_ON(engine->id >= ARRAY_SIZE(context_switch_events)))
+		return -EINVAL;
+
+	return context_switch_events[engine->id];
+}
+
+static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist)
+{
+	gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
+			execlist->running_slot ?
+			execlist->running_slot->index : -1,
+			execlist->running_context ?
+			execlist->running_context->context_id : 0,
+			execlist->pending_slot ?
+			execlist->pending_slot->index : -1);
+
+	execlist->running_slot = execlist->pending_slot;
+	execlist->pending_slot = NULL;
+	execlist->running_context = execlist->running_context ?
+		&execlist->running_slot->ctx[0] : NULL;
+
+	gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
+			execlist->running_slot ?
+			execlist->running_slot->index : -1,
+			execlist->running_context ?
+			execlist->running_context->context_id : 0,
+			execlist->pending_slot ?
+			execlist->pending_slot->index : -1);
+}
+
+static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
+{
+	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
+	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
+	struct execlist_ctx_descriptor_format *desc = execlist->running_context;
+	struct intel_vgpu *vgpu = execlist->vgpu;
+	struct execlist_status_format status;
+	u32 status_reg =
+		execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS);
+
+	status.ldw = vgpu_vreg(vgpu, status_reg);
+	status.udw = vgpu_vreg(vgpu, status_reg + 4);
+
+	if (running) {
+		status.current_execlist_pointer = !!running->index;
+		status.execlist_write_pointer = !!!running->index;
+		status.execlist_0_active = status.execlist_0_valid =
+			!!!(running->index);
+		status.execlist_1_active = status.execlist_1_valid =
+			!!(running->index);
+	} else {
+		status.context_id = 0;
+		status.execlist_0_active = status.execlist_0_valid = 0;
+		status.execlist_1_active = status.execlist_1_valid = 0;
+	}
+
+	status.context_id = desc ? desc->context_id : 0;
+	status.execlist_queue_full = !!(pending);
+
+	vgpu_vreg(vgpu, status_reg) = status.ldw;
+	vgpu_vreg(vgpu, status_reg + 4) = status.udw;
+
+	gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
+		vgpu->id, status_reg, status.ldw, status.udw);
+}
+
+static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
+			       struct execlist_context_status_format *status,
+			       bool trigger_interrupt_later)
+{
+	struct intel_vgpu *vgpu = execlist->vgpu;
+	struct execlist_context_status_pointer_format ctx_status_ptr;
+	u32 write_pointer;
+	u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset;
+	unsigned long hwsp_gpa;
+
+	ctx_status_ptr_reg =
+		execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS_PTR);
+	ctx_status_buf_reg =
+		execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS_BUF);
+
+	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
+
+	write_pointer = ctx_status_ptr.write_ptr;
+
+	if (write_pointer == 0x7)
+		write_pointer = 0;
+	else {
+		++write_pointer;
+		write_pointer %= 0x6;
+	}
+
+	offset = ctx_status_buf_reg + write_pointer * 8;
+
+	vgpu_vreg(vgpu, offset) = status->ldw;
+	vgpu_vreg(vgpu, offset + 4) = status->udw;
+
+	ctx_status_ptr.write_ptr = write_pointer;
+	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
+
+	/* Update the CSB and CSB write pointer in HWSP */
+	hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
+					 vgpu->hws_pga[execlist->engine->id]);
+	if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) {
+		intel_gvt_write_gpa(vgpu,
+			hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 + write_pointer * 8,
+			status, 8);
+		intel_gvt_write_gpa(vgpu,
+			hwsp_gpa + INTEL_HWS_CSB_WRITE_INDEX(execlist->engine->i915) * 4,
+			&write_pointer, 4);
+	}
+
+	gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
+		   vgpu->id, write_pointer, offset, status->ldw, status->udw);
+
+	if (trigger_interrupt_later)
+		return;
+
+	intel_vgpu_trigger_virtual_event(vgpu,
+					 to_context_switch_event(execlist->engine));
+}
+
+static int emulate_execlist_ctx_schedule_out(
+		struct intel_vgpu_execlist *execlist,
+		struct execlist_ctx_descriptor_format *ctx)
+{
+	struct intel_vgpu *vgpu = execlist->vgpu;
+	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
+	struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
+	struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
+	struct execlist_ctx_descriptor_format *ctx1 = &running->ctx[1];
+	struct execlist_context_status_format status;
+
+	memset(&status, 0, sizeof(status));
+
+	gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
+
+	if (WARN_ON(!same_context(ctx, execlist->running_context))) {
+		gvt_vgpu_err("schedule out context is not running context,"
+				"ctx id %x running ctx id %x\n",
+				ctx->context_id,
+				execlist->running_context->context_id);
+		return -EINVAL;
+	}
+
+	/* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
+	if (valid_context(ctx1) && same_context(ctx0, ctx)) {
+		gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
+
+		execlist->running_context = ctx1;
+
+		emulate_execlist_status(execlist);
+
+		status.context_complete = status.element_switch = 1;
+		status.context_id = ctx->context_id;
+
+		emulate_csb_update(execlist, &status, false);
+		/*
+		 * ctx1 is not valid, ctx == ctx0
+		 * ctx1 is valid, ctx1 == ctx
+		 *	--> last element is finished
+		 * emulate:
+		 *	active-to-idle if there is *no* pending execlist
+		 *	context-complete if there *is* pending execlist
+		 */
+	} else if ((!valid_context(ctx1) && same_context(ctx0, ctx))
+			|| (valid_context(ctx1) && same_context(ctx1, ctx))) {
+		gvt_dbg_el("need to switch virtual execlist slot\n");
+
+		switch_virtual_execlist_slot(execlist);
+
+		emulate_execlist_status(execlist);
+
+		status.context_complete = status.active_to_idle = 1;
+		status.context_id = ctx->context_id;
+
+		if (!pending) {
+			emulate_csb_update(execlist, &status, false);
+		} else {
+			emulate_csb_update(execlist, &status, true);
+
+			memset(&status, 0, sizeof(status));
+
+			status.idle_to_active = 1;
+			status.context_id = 0;
+
+			emulate_csb_update(execlist, &status, false);
+		}
+	} else {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
+		struct intel_vgpu_execlist *execlist)
+{
+	struct intel_vgpu *vgpu = execlist->vgpu;
+	u32 status_reg =
+		execlist_ring_mmio(execlist->engine, _EL_OFFSET_STATUS);
+	struct execlist_status_format status;
+
+	status.ldw = vgpu_vreg(vgpu, status_reg);
+	status.udw = vgpu_vreg(vgpu, status_reg + 4);
+
+	if (status.execlist_queue_full) {
+		gvt_vgpu_err("virtual execlist slots are full\n");
+		return NULL;
+	}
+
+	return &execlist->slot[status.execlist_write_pointer];
+}
+
+static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
+		struct execlist_ctx_descriptor_format ctx[2])
+{
+	struct intel_vgpu_execlist_slot *running = execlist->running_slot;
+	struct intel_vgpu_execlist_slot *slot =
+		get_next_execlist_slot(execlist);
+
+	struct execlist_ctx_descriptor_format *ctx0, *ctx1;
+	struct execlist_context_status_format status;
+	struct intel_vgpu *vgpu = execlist->vgpu;
+
+	gvt_dbg_el("emulate schedule-in\n");
+
+	if (!slot) {
+		gvt_vgpu_err("no available execlist slot\n");
+		return -EINVAL;
+	}
+
+	memset(&status, 0, sizeof(status));
+	memset(slot->ctx, 0, sizeof(slot->ctx));
+
+	slot->ctx[0] = ctx[0];
+	slot->ctx[1] = ctx[1];
+
+	gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
+			slot->index, ctx[0].context_id,
+			ctx[1].context_id);
+
+	/*
+	 * no running execlist, make this write bundle as running execlist
+	 * -> idle-to-active
+	 */
+	if (!running) {
+		gvt_dbg_el("no current running execlist\n");
+
+		execlist->running_slot = slot;
+		execlist->pending_slot = NULL;
+		execlist->running_context = &slot->ctx[0];
+
+		gvt_dbg_el("running slot index %d running context %x\n",
+				execlist->running_slot->index,
+				execlist->running_context->context_id);
+
+		emulate_execlist_status(execlist);
+
+		status.idle_to_active = 1;
+		status.context_id = 0;
+
+		emulate_csb_update(execlist, &status, false);
+		return 0;
+	}
+
+	ctx0 = &running->ctx[0];
+	ctx1 = &running->ctx[1];
+
+	gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
+		running->index, ctx0->context_id, ctx1->context_id);
+
+	/*
+	 * already has an running execlist
+	 *	a. running ctx1 is valid,
+	 *	   ctx0 is finished, and running ctx1 == new execlist ctx[0]
+	 *	b. running ctx1 is not valid,
+	 *	   ctx0 == new execlist ctx[0]
+	 * ----> lite-restore + preempted
+	 */
+	if ((valid_context(ctx1) && same_context(ctx1, &slot->ctx[0]) &&
+		/* condition a */
+		(!same_context(ctx0, execlist->running_context))) ||
+			(!valid_context(ctx1) &&
+			 same_context(ctx0, &slot->ctx[0]))) { /* condition b */
+		gvt_dbg_el("need to switch virtual execlist slot\n");
+
+		execlist->pending_slot = slot;
+		switch_virtual_execlist_slot(execlist);
+
+		emulate_execlist_status(execlist);
+
+		status.lite_restore = status.preempted = 1;
+		status.context_id = ctx[0].context_id;
+
+		emulate_csb_update(execlist, &status, false);
+	} else {
+		gvt_dbg_el("emulate as pending slot\n");
+		/*
+		 * otherwise
+		 * --> emulate pending execlist exist + but no preemption case
+		 */
+		execlist->pending_slot = slot;
+		emulate_execlist_status(execlist);
+	}
+	return 0;
+}
+
+#define get_desc_from_elsp_dwords(ed, i) \
+	((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
+
+static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct execlist_ctx_descriptor_format ctx[2];
+	int ret;
+
+	if (!workload->emulate_schedule_in)
+		return 0;
+
+	ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
+	ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
+
+	ret = emulate_execlist_schedule_in(&s->execlist[workload->engine->id],
+					   ctx);
+	if (ret) {
+		gvt_vgpu_err("fail to emulate execlist schedule in\n");
+		return ret;
+	}
+	return 0;
+}
+
+static int complete_execlist_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_vgpu_execlist *execlist =
+		&s->execlist[workload->engine->id];
+	struct intel_vgpu_workload *next_workload;
+	struct list_head *next = workload_q_head(vgpu, workload->engine)->next;
+	bool lite_restore = false;
+	int ret = 0;
+
+	gvt_dbg_el("complete workload %p status %d\n",
+		   workload, workload->status);
+
+	if (workload->status || vgpu->resetting_eng & workload->engine->mask)
+		goto out;
+
+	if (!list_empty(workload_q_head(vgpu, workload->engine))) {
+		struct execlist_ctx_descriptor_format *this_desc, *next_desc;
+
+		next_workload = container_of(next,
+				struct intel_vgpu_workload, list);
+		this_desc = &workload->ctx_desc;
+		next_desc = &next_workload->ctx_desc;
+
+		lite_restore = same_context(this_desc, next_desc);
+	}
+
+	if (lite_restore) {
+		gvt_dbg_el("next context == current - no schedule-out\n");
+		goto out;
+	}
+
+	ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
+out:
+	return ret;
+}
+
+static int submit_context(struct intel_vgpu *vgpu,
+			  const struct intel_engine_cs *engine,
+			  struct execlist_ctx_descriptor_format *desc,
+			  bool emulate_schedule_in)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_vgpu_workload *workload = NULL;
+
+	workload = intel_vgpu_create_workload(vgpu, engine, desc);
+	if (IS_ERR(workload))
+		return PTR_ERR(workload);
+
+	workload->prepare = prepare_execlist_workload;
+	workload->complete = complete_execlist_workload;
+	workload->emulate_schedule_in = emulate_schedule_in;
+
+	if (emulate_schedule_in)
+		workload->elsp_dwords = s->execlist[engine->id].elsp_dwords;
+
+	gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
+		   emulate_schedule_in);
+
+	intel_vgpu_queue_workload(workload);
+	return 0;
+}
+
+int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu,
+			       const struct intel_engine_cs *engine)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_vgpu_execlist *execlist = &s->execlist[engine->id];
+	struct execlist_ctx_descriptor_format *desc[2];
+	int i, ret;
+
+	desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
+	desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
+
+	if (!desc[0]->valid) {
+		gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
+		goto inv_desc;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(desc); i++) {
+		if (!desc[i]->valid)
+			continue;
+		if (!desc[i]->privilege_access) {
+			gvt_vgpu_err("unexpected GGTT elsp submission\n");
+			goto inv_desc;
+		}
+	}
+
+	/* submit workload */
+	for (i = 0; i < ARRAY_SIZE(desc); i++) {
+		if (!desc[i]->valid)
+			continue;
+		ret = submit_context(vgpu, engine, desc[i], i == 0);
+		if (ret) {
+			gvt_vgpu_err("failed to submit desc %d\n", i);
+			return ret;
+		}
+	}
+
+	return 0;
+
+inv_desc:
+	gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
+		     desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw);
+	return -EINVAL;
+}
+
+static void init_vgpu_execlist(struct intel_vgpu *vgpu,
+			       const struct intel_engine_cs *engine)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_vgpu_execlist *execlist = &s->execlist[engine->id];
+	struct execlist_context_status_pointer_format ctx_status_ptr;
+	u32 ctx_status_ptr_reg;
+
+	memset(execlist, 0, sizeof(*execlist));
+
+	execlist->vgpu = vgpu;
+	execlist->engine = engine;
+	execlist->slot[0].index = 0;
+	execlist->slot[1].index = 1;
+
+	ctx_status_ptr_reg = execlist_ring_mmio(engine, _EL_OFFSET_STATUS_PTR);
+	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
+	ctx_status_ptr.read_ptr = 0;
+	ctx_status_ptr.write_ptr = 0x7;
+	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
+}
+
+static void clean_execlist(struct intel_vgpu *vgpu,
+			   intel_engine_mask_t engine_mask)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	intel_engine_mask_t tmp;
+
+	for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) {
+		kfree(s->ring_scan_buffer[engine->id]);
+		s->ring_scan_buffer[engine->id] = NULL;
+		s->ring_scan_buffer_size[engine->id] = 0;
+	}
+}
+
+static void reset_execlist(struct intel_vgpu *vgpu,
+			   intel_engine_mask_t engine_mask)
+{
+	struct intel_engine_cs *engine;
+	intel_engine_mask_t tmp;
+
+	for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp)
+		init_vgpu_execlist(vgpu, engine);
+}
+
+static int init_execlist(struct intel_vgpu *vgpu,
+			 intel_engine_mask_t engine_mask)
+{
+	reset_execlist(vgpu, engine_mask);
+	return 0;
+}
+
+const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = {
+	.name = "execlist",
+	.init = init_execlist,
+	.reset = reset_execlist,
+	.clean = clean_execlist,
+};
diff --git a/drivers/gpu/drm/i915/gvt/execlist.h b/drivers/gpu/drm/i915/gvt/execlist.h
new file mode 100644
index 0000000000..84ad74b37d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/execlist.h
@@ -0,0 +1,185 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *
+ */
+
+#ifndef _GVT_EXECLIST_H_
+#define _GVT_EXECLIST_H_
+
+#include <linux/types.h>
+
+struct execlist_ctx_descriptor_format {
+	union {
+		u32 ldw;
+		struct {
+			u32 valid                  : 1;
+			u32 force_pd_restore       : 1;
+			u32 force_restore          : 1;
+			u32 addressing_mode        : 2;
+			u32 llc_coherency          : 1;
+			u32 fault_handling         : 2;
+			u32 privilege_access       : 1;
+			u32 reserved               : 3;
+			u32 lrca                   : 20;
+		};
+	};
+	union {
+		u32 udw;
+		u32 context_id;
+	};
+};
+
+struct execlist_status_format {
+	union {
+		u32 ldw;
+		struct {
+			u32 current_execlist_pointer       :1;
+			u32 execlist_write_pointer         :1;
+			u32 execlist_queue_full            :1;
+			u32 execlist_1_valid               :1;
+			u32 execlist_0_valid               :1;
+			u32 last_ctx_switch_reason         :9;
+			u32 current_active_elm_status      :2;
+			u32 arbitration_enable             :1;
+			u32 execlist_1_active              :1;
+			u32 execlist_0_active              :1;
+			u32 reserved                       :13;
+		};
+	};
+	union {
+		u32 udw;
+		u32 context_id;
+	};
+};
+
+struct execlist_context_status_pointer_format {
+	union {
+		u32 dw;
+		struct {
+			u32 write_ptr              :3;
+			u32 reserved               :5;
+			u32 read_ptr               :3;
+			u32 reserved2              :5;
+			u32 mask                   :16;
+		};
+	};
+};
+
+struct execlist_context_status_format {
+	union {
+		u32 ldw;
+		struct {
+			u32 idle_to_active         :1;
+			u32 preempted              :1;
+			u32 element_switch         :1;
+			u32 active_to_idle         :1;
+			u32 context_complete       :1;
+			u32 wait_on_sync_flip      :1;
+			u32 wait_on_vblank         :1;
+			u32 wait_on_semaphore      :1;
+			u32 wait_on_scanline       :1;
+			u32 reserved               :2;
+			u32 semaphore_wait_mode    :1;
+			u32 display_plane          :3;
+			u32 lite_restore           :1;
+			u32 reserved_2             :16;
+		};
+	};
+	union {
+		u32 udw;
+		u32 context_id;
+	};
+};
+
+struct execlist_mmio_pair {
+	u32 addr;
+	u32 val;
+};
+
+/* The first 52 dwords in register state context */
+struct execlist_ring_context {
+	u32 nop1;
+	u32 lri_cmd_1;
+	struct execlist_mmio_pair ctx_ctrl;
+	struct execlist_mmio_pair ring_header;
+	struct execlist_mmio_pair ring_tail;
+	struct execlist_mmio_pair rb_start;
+	struct execlist_mmio_pair rb_ctrl;
+	struct execlist_mmio_pair bb_cur_head_UDW;
+	struct execlist_mmio_pair bb_cur_head_LDW;
+	struct execlist_mmio_pair bb_state;
+	struct execlist_mmio_pair second_bb_addr_UDW;
+	struct execlist_mmio_pair second_bb_addr_LDW;
+	struct execlist_mmio_pair second_bb_state;
+	struct execlist_mmio_pair bb_per_ctx_ptr;
+	struct execlist_mmio_pair rcs_indirect_ctx;
+	struct execlist_mmio_pair rcs_indirect_ctx_offset;
+	u32 nop2;
+	u32 nop3;
+	u32 nop4;
+	u32 lri_cmd_2;
+	struct execlist_mmio_pair ctx_timestamp;
+	/*
+	 * pdps[8]={ pdp3_UDW, pdp3_LDW, pdp2_UDW, pdp2_LDW,
+	 *           pdp1_UDW, pdp1_LDW, pdp0_UDW, pdp0_LDW}
+	 */
+	struct execlist_mmio_pair pdps[8];
+};
+
+struct intel_vgpu_elsp_dwords {
+	u32 data[4];
+	u32 index;
+};
+
+struct intel_vgpu_execlist_slot {
+	struct execlist_ctx_descriptor_format ctx[2];
+	u32 index;
+};
+
+struct intel_vgpu_execlist {
+	struct intel_vgpu_execlist_slot slot[2];
+	struct intel_vgpu_execlist_slot *running_slot;
+	struct intel_vgpu_execlist_slot *pending_slot;
+	struct execlist_ctx_descriptor_format *running_context;
+	struct intel_vgpu *vgpu;
+	struct intel_vgpu_elsp_dwords elsp_dwords;
+	const struct intel_engine_cs *engine;
+};
+
+void intel_vgpu_clean_execlist(struct intel_vgpu *vgpu);
+
+int intel_vgpu_init_execlist(struct intel_vgpu *vgpu);
+
+int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu,
+			       const struct intel_engine_cs *engine);
+
+#endif /*_GVT_EXECLIST_H_*/
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c
new file mode 100644
index 0000000000..835c3fde8a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c
@@ -0,0 +1,508 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Bing Niu <bing.niu@intel.com>
+ *    Xu Han <xu.han@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Xiaoguang Chen <xiaoguang.chen@intel.com>
+ *    Yang Liu <yang2.liu@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *
+ */
+
+#include <uapi/drm/drm_fourcc.h>
+#include "i915_drv.h"
+#include "gvt.h"
+#include "i915_pvinfo.h"
+#include "i915_reg.h"
+
+#define PRIMARY_FORMAT_NUM	16
+struct pixel_format {
+	int drm_format;	/* Pixel format in DRM definition */
+	int bpp; /* Bits per pixel, 0 indicates invalid */
+	const char *desc; /* The description */
+};
+
+static const struct pixel_format bdw_pixel_formats[] = {
+	{DRM_FORMAT_C8, 8, "8-bit Indexed"},
+	{DRM_FORMAT_RGB565, 16, "16-bit BGRX (5:6:5 MSB-R:G:B)"},
+	{DRM_FORMAT_XRGB8888, 32, "32-bit BGRX (8:8:8:8 MSB-X:R:G:B)"},
+	{DRM_FORMAT_XBGR2101010, 32, "32-bit RGBX (2:10:10:10 MSB-X:B:G:R)"},
+
+	{DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"},
+	{DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"},
+
+	/* non-supported format has bpp default to 0 */
+	{0, 0, NULL},
+};
+
+static const struct pixel_format skl_pixel_formats[] = {
+	{DRM_FORMAT_YUYV, 16, "16-bit packed YUYV (8:8:8:8 MSB-V:Y2:U:Y1)"},
+	{DRM_FORMAT_UYVY, 16, "16-bit packed UYVY (8:8:8:8 MSB-Y2:V:Y1:U)"},
+	{DRM_FORMAT_YVYU, 16, "16-bit packed YVYU (8:8:8:8 MSB-U:Y2:V:Y1)"},
+	{DRM_FORMAT_VYUY, 16, "16-bit packed VYUY (8:8:8:8 MSB-Y2:U:Y1:V)"},
+
+	{DRM_FORMAT_C8, 8, "8-bit Indexed"},
+	{DRM_FORMAT_RGB565, 16, "16-bit BGRX (5:6:5 MSB-R:G:B)"},
+	{DRM_FORMAT_ABGR8888, 32, "32-bit RGBA (8:8:8:8 MSB-A:B:G:R)"},
+	{DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"},
+
+	{DRM_FORMAT_ARGB8888, 32, "32-bit BGRA (8:8:8:8 MSB-A:R:G:B)"},
+	{DRM_FORMAT_XRGB8888, 32, "32-bit BGRX (8:8:8:8 MSB-X:R:G:B)"},
+	{DRM_FORMAT_XBGR2101010, 32, "32-bit RGBX (2:10:10:10 MSB-X:B:G:R)"},
+	{DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"},
+
+	/* non-supported format has bpp default to 0 */
+	{0, 0, NULL},
+};
+
+static int bdw_format_to_drm(int format)
+{
+	int bdw_pixel_formats_index = 6;
+
+	switch (format) {
+	case DISP_FORMAT_8BPP:
+		bdw_pixel_formats_index = 0;
+		break;
+	case DISP_FORMAT_BGRX565:
+		bdw_pixel_formats_index = 1;
+		break;
+	case DISP_FORMAT_BGRX888:
+		bdw_pixel_formats_index = 2;
+		break;
+	case DISP_FORMAT_RGBX101010:
+		bdw_pixel_formats_index = 3;
+		break;
+	case DISP_FORMAT_BGRX101010:
+		bdw_pixel_formats_index = 4;
+		break;
+	case DISP_FORMAT_RGBX888:
+		bdw_pixel_formats_index = 5;
+		break;
+
+	default:
+		break;
+	}
+
+	return bdw_pixel_formats_index;
+}
+
+static int skl_format_to_drm(int format, bool rgb_order, bool alpha,
+	int yuv_order)
+{
+	int skl_pixel_formats_index = 12;
+
+	switch (format) {
+	case PLANE_CTL_FORMAT_INDEXED:
+		skl_pixel_formats_index = 4;
+		break;
+	case PLANE_CTL_FORMAT_RGB_565:
+		skl_pixel_formats_index = 5;
+		break;
+	case PLANE_CTL_FORMAT_XRGB_8888:
+		if (rgb_order)
+			skl_pixel_formats_index = alpha ? 6 : 7;
+		else
+			skl_pixel_formats_index = alpha ? 8 : 9;
+		break;
+	case PLANE_CTL_FORMAT_XRGB_2101010:
+		skl_pixel_formats_index = rgb_order ? 10 : 11;
+		break;
+	case PLANE_CTL_FORMAT_YUV422:
+		skl_pixel_formats_index = yuv_order >> 16;
+		if (skl_pixel_formats_index > 3)
+			return -EINVAL;
+		break;
+
+	default:
+		break;
+	}
+
+	return skl_pixel_formats_index;
+}
+
+static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe,
+	u32 tiled, int stride_mask, int bpp)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+
+	u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(pipe)) & stride_mask;
+	u32 stride = stride_reg;
+
+	if (GRAPHICS_VER(dev_priv) >= 9) {
+		switch (tiled) {
+		case PLANE_CTL_TILED_LINEAR:
+			stride = stride_reg * 64;
+			break;
+		case PLANE_CTL_TILED_X:
+			stride = stride_reg * 512;
+			break;
+		case PLANE_CTL_TILED_Y:
+			stride = stride_reg * 128;
+			break;
+		case PLANE_CTL_TILED_YF:
+			if (bpp == 8)
+				stride = stride_reg * 64;
+			else if (bpp == 16 || bpp == 32 || bpp == 64)
+				stride = stride_reg * 128;
+			else
+				gvt_dbg_core("skl: unsupported bpp:%d\n", bpp);
+			break;
+		default:
+			gvt_dbg_core("skl: unsupported tile format:%x\n",
+				tiled);
+		}
+	}
+
+	return stride;
+}
+
+static int get_active_pipe(struct intel_vgpu *vgpu)
+{
+	int i;
+
+	for (i = 0; i < I915_MAX_PIPES; i++)
+		if (pipe_is_enabled(vgpu, i))
+			break;
+
+	return i;
+}
+
+/**
+ * intel_vgpu_decode_primary_plane - Decode primary plane
+ * @vgpu: input vgpu
+ * @plane: primary plane to save decoded info
+ * This function is called for decoding plane
+ *
+ * Returns:
+ * 0 on success, non-zero if failed.
+ */
+int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_primary_plane_format *plane)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	u32 val, fmt;
+	int pipe;
+
+	pipe = get_active_pipe(vgpu);
+	if (pipe >= I915_MAX_PIPES)
+		return -ENODEV;
+
+	val = vgpu_vreg_t(vgpu, DSPCNTR(pipe));
+	plane->enabled = !!(val & DISP_ENABLE);
+	if (!plane->enabled)
+		return -ENODEV;
+
+	if (GRAPHICS_VER(dev_priv) >= 9) {
+		plane->tiled = val & PLANE_CTL_TILED_MASK;
+		fmt = skl_format_to_drm(
+			val & PLANE_CTL_FORMAT_MASK_SKL,
+			val & PLANE_CTL_ORDER_RGBX,
+			val & PLANE_CTL_ALPHA_MASK,
+			val & PLANE_CTL_YUV422_ORDER_MASK);
+
+		if (fmt >= ARRAY_SIZE(skl_pixel_formats)) {
+			gvt_vgpu_err("Out-of-bounds pixel format index\n");
+			return -EINVAL;
+		}
+
+		plane->bpp = skl_pixel_formats[fmt].bpp;
+		plane->drm_format = skl_pixel_formats[fmt].drm_format;
+	} else {
+		plane->tiled = val & DISP_TILED;
+		fmt = bdw_format_to_drm(val & DISP_FORMAT_MASK);
+		plane->bpp = bdw_pixel_formats[fmt].bpp;
+		plane->drm_format = bdw_pixel_formats[fmt].drm_format;
+	}
+
+	if (!plane->bpp) {
+		gvt_vgpu_err("Non-supported pixel format (0x%x)\n", fmt);
+		return -EINVAL;
+	}
+
+	plane->hw_format = fmt;
+
+	plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK;
+	if (!vgpu_gmadr_is_valid(vgpu, plane->base))
+		return  -EINVAL;
+
+	plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
+	if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
+		gvt_vgpu_err("Translate primary plane gma 0x%x to gpa fail\n",
+				plane->base);
+		return  -EINVAL;
+	}
+
+	plane->stride = intel_vgpu_get_stride(vgpu, pipe, plane->tiled,
+		(GRAPHICS_VER(dev_priv) >= 9) ?
+		(_PRI_PLANE_STRIDE_MASK >> 6) :
+		_PRI_PLANE_STRIDE_MASK, plane->bpp);
+
+	plane->width = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >>
+		_PIPE_H_SRCSZ_SHIFT;
+	plane->width += 1;
+	plane->height = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) &
+			_PIPE_V_SRCSZ_MASK) >> _PIPE_V_SRCSZ_SHIFT;
+	plane->height += 1;	/* raw height is one minus the real value */
+
+	val = vgpu_vreg_t(vgpu, DSPTILEOFF(pipe));
+	plane->x_offset = (val & _PRI_PLANE_X_OFF_MASK) >>
+		_PRI_PLANE_X_OFF_SHIFT;
+	plane->y_offset = (val & _PRI_PLANE_Y_OFF_MASK) >>
+		_PRI_PLANE_Y_OFF_SHIFT;
+
+	return 0;
+}
+
+#define CURSOR_FORMAT_NUM	(1 << 6)
+struct cursor_mode_format {
+	int drm_format;	/* Pixel format in DRM definition */
+	u8 bpp; /* Bits per pixel; 0 indicates invalid */
+	u32 width; /* In pixel */
+	u32 height; /* In lines */
+	const char *desc; /* The description */
+};
+
+static const struct cursor_mode_format cursor_pixel_formats[] = {
+	{DRM_FORMAT_ARGB8888, 32, 128, 128, "128x128 32bpp ARGB"},
+	{DRM_FORMAT_ARGB8888, 32, 256, 256, "256x256 32bpp ARGB"},
+	{DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"},
+	{DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"},
+
+	/* non-supported format has bpp default to 0 */
+	{0, 0, 0, 0, NULL},
+};
+
+static int cursor_mode_to_drm(int mode)
+{
+	int cursor_pixel_formats_index = 4;
+
+	switch (mode) {
+	case MCURSOR_MODE_128_ARGB_AX:
+		cursor_pixel_formats_index = 0;
+		break;
+	case MCURSOR_MODE_256_ARGB_AX:
+		cursor_pixel_formats_index = 1;
+		break;
+	case MCURSOR_MODE_64_ARGB_AX:
+		cursor_pixel_formats_index = 2;
+		break;
+	case MCURSOR_MODE_64_32B_AX:
+		cursor_pixel_formats_index = 3;
+		break;
+
+	default:
+		break;
+	}
+
+	return cursor_pixel_formats_index;
+}
+
+/**
+ * intel_vgpu_decode_cursor_plane - Decode sprite plane
+ * @vgpu: input vgpu
+ * @plane: cursor plane to save decoded info
+ * This function is called for decoding plane
+ *
+ * Returns:
+ * 0 on success, non-zero if failed.
+ */
+int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_cursor_plane_format *plane)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	u32 val, mode, index;
+	u32 alpha_plane, alpha_force;
+	int pipe;
+
+	pipe = get_active_pipe(vgpu);
+	if (pipe >= I915_MAX_PIPES)
+		return -ENODEV;
+
+	val = vgpu_vreg_t(vgpu, CURCNTR(pipe));
+	mode = val & MCURSOR_MODE_MASK;
+	plane->enabled = (mode != MCURSOR_MODE_DISABLE);
+	if (!plane->enabled)
+		return -ENODEV;
+
+	index = cursor_mode_to_drm(mode);
+
+	if (!cursor_pixel_formats[index].bpp) {
+		gvt_vgpu_err("Non-supported cursor mode (0x%x)\n", mode);
+		return -EINVAL;
+	}
+	plane->mode = mode;
+	plane->bpp = cursor_pixel_formats[index].bpp;
+	plane->drm_format = cursor_pixel_formats[index].drm_format;
+	plane->width = cursor_pixel_formats[index].width;
+	plane->height = cursor_pixel_formats[index].height;
+
+	alpha_plane = (val & _CURSOR_ALPHA_PLANE_MASK) >>
+				_CURSOR_ALPHA_PLANE_SHIFT;
+	alpha_force = (val & _CURSOR_ALPHA_FORCE_MASK) >>
+				_CURSOR_ALPHA_FORCE_SHIFT;
+	if (alpha_plane || alpha_force)
+		gvt_dbg_core("alpha_plane=0x%x, alpha_force=0x%x\n",
+			alpha_plane, alpha_force);
+
+	plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK;
+	if (!vgpu_gmadr_is_valid(vgpu, plane->base))
+		return  -EINVAL;
+
+	plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
+	if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
+		gvt_vgpu_err("Translate cursor plane gma 0x%x to gpa fail\n",
+				plane->base);
+		return  -EINVAL;
+	}
+
+	val = vgpu_vreg_t(vgpu, CURPOS(pipe));
+	plane->x_pos = (val & _CURSOR_POS_X_MASK) >> _CURSOR_POS_X_SHIFT;
+	plane->x_sign = (val & _CURSOR_SIGN_X_MASK) >> _CURSOR_SIGN_X_SHIFT;
+	plane->y_pos = (val & _CURSOR_POS_Y_MASK) >> _CURSOR_POS_Y_SHIFT;
+	plane->y_sign = (val & _CURSOR_SIGN_Y_MASK) >> _CURSOR_SIGN_Y_SHIFT;
+
+	plane->x_hot = vgpu_vreg_t(vgpu, vgtif_reg(cursor_x_hot));
+	plane->y_hot = vgpu_vreg_t(vgpu, vgtif_reg(cursor_y_hot));
+	return 0;
+}
+
+#define SPRITE_FORMAT_NUM	(1 << 3)
+
+static const struct pixel_format sprite_pixel_formats[SPRITE_FORMAT_NUM] = {
+	[0x0] = {DRM_FORMAT_YUV422, 16, "YUV 16-bit 4:2:2 packed"},
+	[0x1] = {DRM_FORMAT_XRGB2101010, 32, "RGB 32-bit 2:10:10:10"},
+	[0x2] = {DRM_FORMAT_XRGB8888, 32, "RGB 32-bit 8:8:8:8"},
+	[0x4] = {DRM_FORMAT_AYUV, 32,
+		"YUV 32-bit 4:4:4 packed (8:8:8:8 MSB-X:Y:U:V)"},
+};
+
+/**
+ * intel_vgpu_decode_sprite_plane - Decode sprite plane
+ * @vgpu: input vgpu
+ * @plane: sprite plane to save decoded info
+ * This function is called for decoding plane
+ *
+ * Returns:
+ * 0 on success, non-zero if failed.
+ */
+int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_sprite_plane_format *plane)
+{
+	u32 val, fmt;
+	u32 color_order, yuv_order;
+	int drm_format;
+	int pipe;
+
+	pipe = get_active_pipe(vgpu);
+	if (pipe >= I915_MAX_PIPES)
+		return -ENODEV;
+
+	val = vgpu_vreg_t(vgpu, SPRCTL(pipe));
+	plane->enabled = !!(val & SPRITE_ENABLE);
+	if (!plane->enabled)
+		return -ENODEV;
+
+	plane->tiled = !!(val & SPRITE_TILED);
+	color_order = !!(val & SPRITE_RGB_ORDER_RGBX);
+	yuv_order = (val & SPRITE_YUV_ORDER_MASK) >>
+				_SPRITE_YUV_ORDER_SHIFT;
+
+	fmt = (val & SPRITE_FORMAT_MASK) >> _SPRITE_FMT_SHIFT;
+	if (!sprite_pixel_formats[fmt].bpp) {
+		gvt_vgpu_err("Non-supported pixel format (0x%x)\n", fmt);
+		return -EINVAL;
+	}
+	plane->hw_format = fmt;
+	plane->bpp = sprite_pixel_formats[fmt].bpp;
+	drm_format = sprite_pixel_formats[fmt].drm_format;
+
+	/* Order of RGB values in an RGBxxx buffer may be ordered RGB or
+	 * BGR depending on the state of the color_order field
+	 */
+	if (!color_order) {
+		if (drm_format == DRM_FORMAT_XRGB2101010)
+			drm_format = DRM_FORMAT_XBGR2101010;
+		else if (drm_format == DRM_FORMAT_XRGB8888)
+			drm_format = DRM_FORMAT_XBGR8888;
+	}
+
+	if (drm_format == DRM_FORMAT_YUV422) {
+		switch (yuv_order) {
+		case 0:
+			drm_format = DRM_FORMAT_YUYV;
+			break;
+		case 1:
+			drm_format = DRM_FORMAT_UYVY;
+			break;
+		case 2:
+			drm_format = DRM_FORMAT_YVYU;
+			break;
+		case 3:
+			drm_format = DRM_FORMAT_VYUY;
+			break;
+		default:
+			/* yuv_order has only 2 bits */
+			break;
+		}
+	}
+
+	plane->drm_format = drm_format;
+
+	plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK;
+	if (!vgpu_gmadr_is_valid(vgpu, plane->base))
+		return  -EINVAL;
+
+	plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
+	if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
+		gvt_vgpu_err("Translate sprite plane gma 0x%x to gpa fail\n",
+				plane->base);
+		return  -EINVAL;
+	}
+
+	plane->stride = vgpu_vreg_t(vgpu, SPRSTRIDE(pipe)) &
+				_SPRITE_STRIDE_MASK;
+
+	val = vgpu_vreg_t(vgpu, SPRSIZE(pipe));
+	plane->height = (val & _SPRITE_SIZE_HEIGHT_MASK) >>
+		_SPRITE_SIZE_HEIGHT_SHIFT;
+	plane->width = (val & _SPRITE_SIZE_WIDTH_MASK) >>
+		_SPRITE_SIZE_WIDTH_SHIFT;
+	plane->height += 1;	/* raw height is one minus the real value */
+	plane->width += 1;	/* raw width is one minus the real value */
+
+	val = vgpu_vreg_t(vgpu, SPRPOS(pipe));
+	plane->x_pos = (val & _SPRITE_POS_X_MASK) >> _SPRITE_POS_X_SHIFT;
+	plane->y_pos = (val & _SPRITE_POS_Y_MASK) >> _SPRITE_POS_Y_SHIFT;
+
+	val = vgpu_vreg_t(vgpu, SPROFFSET(pipe));
+	plane->x_offset = (val & _SPRITE_OFFSET_START_X_MASK) >>
+			   _SPRITE_OFFSET_START_X_SHIFT;
+	plane->y_offset = (val & _SPRITE_OFFSET_START_Y_MASK) >>
+			   _SPRITE_OFFSET_START_Y_SHIFT;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.h b/drivers/gpu/drm/i915/gvt/fb_decoder.h
new file mode 100644
index 0000000000..4eff441944
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.h
@@ -0,0 +1,173 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Bing Niu <bing.niu@intel.com>
+ *    Xu Han <xu.han@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Xiaoguang Chen <xiaoguang.chen@intel.com>
+ *    Yang Liu <yang2.liu@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *
+ */
+
+#ifndef _GVT_FB_DECODER_H_
+#define _GVT_FB_DECODER_H_
+
+#include <linux/types.h>
+
+#include "display/intel_display_limits.h"
+
+struct intel_vgpu;
+
+#define _PLANE_CTL_FORMAT_SHIFT		24
+#define _PLANE_CTL_TILED_SHIFT		10
+#define _PIPE_V_SRCSZ_SHIFT		0
+#define _PIPE_V_SRCSZ_MASK		(0xfff << _PIPE_V_SRCSZ_SHIFT)
+#define _PIPE_H_SRCSZ_SHIFT		16
+#define _PIPE_H_SRCSZ_MASK		(0x1fff << _PIPE_H_SRCSZ_SHIFT)
+
+#define _PRI_PLANE_FMT_SHIFT		26
+#define _PRI_PLANE_STRIDE_MASK		(0x3ff << 6)
+#define _PRI_PLANE_X_OFF_SHIFT		0
+#define _PRI_PLANE_X_OFF_MASK		(0x1fff << _PRI_PLANE_X_OFF_SHIFT)
+#define _PRI_PLANE_Y_OFF_SHIFT		16
+#define _PRI_PLANE_Y_OFF_MASK		(0xfff << _PRI_PLANE_Y_OFF_SHIFT)
+
+#define _CURSOR_MODE			0x3f
+#define _CURSOR_ALPHA_FORCE_SHIFT	8
+#define _CURSOR_ALPHA_FORCE_MASK	(0x3 << _CURSOR_ALPHA_FORCE_SHIFT)
+#define _CURSOR_ALPHA_PLANE_SHIFT	10
+#define _CURSOR_ALPHA_PLANE_MASK	(0x3 << _CURSOR_ALPHA_PLANE_SHIFT)
+#define _CURSOR_POS_X_SHIFT		0
+#define _CURSOR_POS_X_MASK		(0x1fff << _CURSOR_POS_X_SHIFT)
+#define _CURSOR_SIGN_X_SHIFT		15
+#define _CURSOR_SIGN_X_MASK		(1 << _CURSOR_SIGN_X_SHIFT)
+#define _CURSOR_POS_Y_SHIFT		16
+#define _CURSOR_POS_Y_MASK		(0xfff << _CURSOR_POS_Y_SHIFT)
+#define _CURSOR_SIGN_Y_SHIFT		31
+#define _CURSOR_SIGN_Y_MASK		(1 << _CURSOR_SIGN_Y_SHIFT)
+
+#define _SPRITE_FMT_SHIFT		25
+#define _SPRITE_COLOR_ORDER_SHIFT	20
+#define _SPRITE_YUV_ORDER_SHIFT		16
+#define _SPRITE_STRIDE_SHIFT		6
+#define _SPRITE_STRIDE_MASK		(0x1ff << _SPRITE_STRIDE_SHIFT)
+#define _SPRITE_SIZE_WIDTH_SHIFT	0
+#define _SPRITE_SIZE_HEIGHT_SHIFT	16
+#define _SPRITE_SIZE_WIDTH_MASK		(0x1fff << _SPRITE_SIZE_WIDTH_SHIFT)
+#define _SPRITE_SIZE_HEIGHT_MASK	(0xfff << _SPRITE_SIZE_HEIGHT_SHIFT)
+#define _SPRITE_POS_X_SHIFT		0
+#define _SPRITE_POS_Y_SHIFT		16
+#define _SPRITE_POS_X_MASK		(0x1fff << _SPRITE_POS_X_SHIFT)
+#define _SPRITE_POS_Y_MASK		(0xfff << _SPRITE_POS_Y_SHIFT)
+#define _SPRITE_OFFSET_START_X_SHIFT	0
+#define _SPRITE_OFFSET_START_Y_SHIFT	16
+#define _SPRITE_OFFSET_START_X_MASK	(0x1fff << _SPRITE_OFFSET_START_X_SHIFT)
+#define _SPRITE_OFFSET_START_Y_MASK	(0xfff << _SPRITE_OFFSET_START_Y_SHIFT)
+
+enum GVT_FB_EVENT {
+	FB_MODE_SET_START = 1,
+	FB_MODE_SET_END,
+	FB_DISPLAY_FLIP,
+};
+
+enum DDI_PORT {
+	DDI_PORT_NONE	= 0,
+	DDI_PORT_B	= 1,
+	DDI_PORT_C	= 2,
+	DDI_PORT_D	= 3,
+	DDI_PORT_E	= 4
+};
+
+/* color space conversion and gamma correction are not included */
+struct intel_vgpu_primary_plane_format {
+	u8	enabled;	/* plane is enabled */
+	u32	tiled;		/* tiling mode: linear, X-tiled, Y tiled, etc */
+	u8	bpp;		/* bits per pixel */
+	u32	hw_format;	/* format field in the PRI_CTL register */
+	u32	drm_format;	/* format in DRM definition */
+	u32	base;		/* framebuffer base in graphics memory */
+	u64     base_gpa;
+	u32	x_offset;	/* in pixels */
+	u32	y_offset;	/* in lines */
+	u32	width;		/* in pixels */
+	u32	height;		/* in lines */
+	u32	stride;		/* in bytes */
+};
+
+struct intel_vgpu_sprite_plane_format {
+	u8	enabled;	/* plane is enabled */
+	u8	tiled;		/* X-tiled */
+	u8	bpp;		/* bits per pixel */
+	u32	hw_format;	/* format field in the SPR_CTL register */
+	u32	drm_format;	/* format in DRM definition */
+	u32	base;		/* sprite base in graphics memory */
+	u64     base_gpa;
+	u32	x_pos;		/* in pixels */
+	u32	y_pos;		/* in lines */
+	u32	x_offset;	/* in pixels */
+	u32	y_offset;	/* in lines */
+	u32	width;		/* in pixels */
+	u32	height;		/* in lines */
+	u32	stride;		/* in bytes */
+};
+
+struct intel_vgpu_cursor_plane_format {
+	u8	enabled;
+	u8	mode;		/* cursor mode select */
+	u8	bpp;		/* bits per pixel */
+	u32	drm_format;	/* format in DRM definition */
+	u32	base;		/* cursor base in graphics memory */
+	u64     base_gpa;
+	u32	x_pos;		/* in pixels */
+	u32	y_pos;		/* in lines */
+	u8	x_sign;		/* X Position Sign */
+	u8	y_sign;		/* Y Position Sign */
+	u32	width;		/* in pixels */
+	u32	height;		/* in lines */
+	u32	x_hot;		/* in pixels */
+	u32	y_hot;		/* in pixels */
+};
+
+struct intel_vgpu_pipe_format {
+	struct intel_vgpu_primary_plane_format	primary;
+	struct intel_vgpu_sprite_plane_format	sprite;
+	struct intel_vgpu_cursor_plane_format	cursor;
+	enum DDI_PORT ddi_port;  /* the DDI port that pipe is connected to */
+};
+
+struct intel_vgpu_fb_format {
+	struct intel_vgpu_pipe_format	pipes[I915_MAX_PIPES];
+};
+
+int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_primary_plane_format *plane);
+int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_cursor_plane_format *plane);
+int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_sprite_plane_format *plane);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
new file mode 100644
index 0000000000..4dd52ac204
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ * Contributors:
+ *    Changbin Du <changbin.du@intel.com>
+ *
+ */
+
+#include <linux/firmware.h>
+#include <linux/crc32.h>
+
+#include "i915_drv.h"
+#include "gvt.h"
+#include "i915_pvinfo.h"
+
+#define FIRMWARE_VERSION (0x0)
+
+struct gvt_firmware_header {
+	u64 magic;
+	u32 crc32;		/* protect the data after this field */
+	u32 version;
+	u64 cfg_space_size;
+	u64 cfg_space_offset;	/* offset in the file */
+	u64 mmio_size;
+	u64 mmio_offset;	/* offset in the file */
+	unsigned char data[];
+};
+
+#define dev_to_drm_minor(d) dev_get_drvdata((d))
+
+static ssize_t
+gvt_firmware_read(struct file *filp, struct kobject *kobj,
+	     struct bin_attribute *attr, char *buf,
+	     loff_t offset, size_t count)
+{
+	memcpy(buf, attr->private + offset, count);
+	return count;
+}
+
+static struct bin_attribute firmware_attr = {
+	.attr = {.name = "gvt_firmware", .mode = (S_IRUSR)},
+	.read = gvt_firmware_read,
+	.write = NULL,
+	.mmap = NULL,
+};
+
+static int expose_firmware_sysfs(struct intel_gvt *gvt)
+{
+	struct intel_gvt_device_info *info = &gvt->device_info;
+	struct drm_i915_private *i915 = gvt->gt->i915;
+	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+	struct gvt_firmware_header *h;
+	void *firmware;
+	void *p;
+	unsigned long size, crc32_start;
+	int ret;
+
+	size = offsetof(struct gvt_firmware_header, data) + info->mmio_size + info->cfg_space_size;
+	firmware = vzalloc(size);
+	if (!firmware)
+		return -ENOMEM;
+
+	h = firmware;
+
+	h->magic = VGT_MAGIC;
+	h->version = FIRMWARE_VERSION;
+	h->cfg_space_size = info->cfg_space_size;
+	h->cfg_space_offset = offsetof(struct gvt_firmware_header, data);
+	h->mmio_size = info->mmio_size;
+	h->mmio_offset = h->cfg_space_offset + h->cfg_space_size;
+
+	p = firmware + h->cfg_space_offset;
+
+	memcpy(gvt->firmware.cfg_space, i915->vgpu.initial_cfg_space,
+	       info->cfg_space_size);
+	memcpy(p, gvt->firmware.cfg_space, info->cfg_space_size);
+
+	p = firmware + h->mmio_offset;
+
+	memcpy(gvt->firmware.mmio, i915->vgpu.initial_mmio,
+	       info->mmio_size);
+
+	memcpy(p, gvt->firmware.mmio, info->mmio_size);
+
+	crc32_start = offsetof(struct gvt_firmware_header, version);
+	h->crc32 = crc32_le(0, firmware + crc32_start, size - crc32_start);
+
+	firmware_attr.size = size;
+	firmware_attr.private = firmware;
+
+	ret = device_create_bin_file(&pdev->dev, &firmware_attr);
+	if (ret) {
+		vfree(firmware);
+		return ret;
+	}
+	return 0;
+}
+
+static void clean_firmware_sysfs(struct intel_gvt *gvt)
+{
+	struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
+
+	device_remove_bin_file(&pdev->dev, &firmware_attr);
+	vfree(firmware_attr.private);
+}
+
+/**
+ * intel_gvt_free_firmware - free GVT firmware
+ * @gvt: intel gvt device
+ *
+ */
+void intel_gvt_free_firmware(struct intel_gvt *gvt)
+{
+	if (!gvt->firmware.firmware_loaded)
+		clean_firmware_sysfs(gvt);
+
+	kfree(gvt->firmware.cfg_space);
+	vfree(gvt->firmware.mmio);
+}
+
+static int verify_firmware(struct intel_gvt *gvt,
+			   const struct firmware *fw)
+{
+	struct intel_gvt_device_info *info = &gvt->device_info;
+	struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
+	struct gvt_firmware_header *h;
+	unsigned long id, crc32_start;
+	const void *mem;
+	const char *item;
+	u64 file, request;
+
+	h = (struct gvt_firmware_header *)fw->data;
+
+	crc32_start = offsetofend(struct gvt_firmware_header, crc32);
+	mem = fw->data + crc32_start;
+
+#define VERIFY(s, a, b) do { \
+	item = (s); file = (u64)(a); request = (u64)(b); \
+	if ((a) != (b)) \
+		goto invalid_firmware; \
+} while (0)
+
+	VERIFY("magic number", h->magic, VGT_MAGIC);
+	VERIFY("version", h->version, FIRMWARE_VERSION);
+	VERIFY("crc32", h->crc32, crc32_le(0, mem, fw->size - crc32_start));
+	VERIFY("cfg space size", h->cfg_space_size, info->cfg_space_size);
+	VERIFY("mmio size", h->mmio_size, info->mmio_size);
+
+	mem = (fw->data + h->cfg_space_offset);
+
+	id = *(u16 *)(mem + PCI_VENDOR_ID);
+	VERIFY("vendor id", id, pdev->vendor);
+
+	id = *(u16 *)(mem + PCI_DEVICE_ID);
+	VERIFY("device id", id, pdev->device);
+
+	id = *(u8 *)(mem + PCI_REVISION_ID);
+	VERIFY("revision id", id, pdev->revision);
+
+#undef VERIFY
+	return 0;
+
+invalid_firmware:
+	gvt_dbg_core("Invalid firmware: %s [file] 0x%llx [request] 0x%llx\n",
+		     item, file, request);
+	return -EINVAL;
+}
+
+#define GVT_FIRMWARE_PATH "i915/gvt"
+
+/**
+ * intel_gvt_load_firmware - load GVT firmware
+ * @gvt: intel gvt device
+ *
+ */
+int intel_gvt_load_firmware(struct intel_gvt *gvt)
+{
+	struct intel_gvt_device_info *info = &gvt->device_info;
+	struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
+	struct intel_gvt_firmware *firmware = &gvt->firmware;
+	struct gvt_firmware_header *h;
+	const struct firmware *fw;
+	char *path;
+	void *mem;
+	int ret;
+
+	path = kmalloc(PATH_MAX, GFP_KERNEL);
+	if (!path)
+		return -ENOMEM;
+
+	mem = kmalloc(info->cfg_space_size, GFP_KERNEL);
+	if (!mem) {
+		kfree(path);
+		return -ENOMEM;
+	}
+
+	firmware->cfg_space = mem;
+
+	mem = vmalloc(info->mmio_size);
+	if (!mem) {
+		kfree(path);
+		kfree(firmware->cfg_space);
+		return -ENOMEM;
+	}
+
+	firmware->mmio = mem;
+
+	sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%02x.golden_hw_state",
+		 GVT_FIRMWARE_PATH, pdev->vendor, pdev->device,
+		 pdev->revision);
+
+	gvt_dbg_core("request hw state firmware %s...\n", path);
+
+	ret = request_firmware(&fw, path, gvt->gt->i915->drm.dev);
+	kfree(path);
+
+	if (ret)
+		goto expose_firmware;
+
+	gvt_dbg_core("success.\n");
+
+	ret = verify_firmware(gvt, fw);
+	if (ret)
+		goto out_free_fw;
+
+	gvt_dbg_core("verified.\n");
+
+	h = (struct gvt_firmware_header *)fw->data;
+
+	memcpy(firmware->cfg_space, fw->data + h->cfg_space_offset,
+	       h->cfg_space_size);
+	memcpy(firmware->mmio, fw->data + h->mmio_offset,
+	       h->mmio_size);
+
+	release_firmware(fw);
+	firmware->firmware_loaded = true;
+	return 0;
+
+out_free_fw:
+	release_firmware(fw);
+expose_firmware:
+	expose_firmware_sysfs(gvt);
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
new file mode 100644
index 0000000000..094fca9b0e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -0,0 +1,2856 @@
+/*
+ * GTT virtualization
+ *
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *    Zhenyu Wang <zhenyuw@linux.intel.com>
+ *    Xiao Zheng <xiao.zheng@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "gvt.h"
+#include "i915_pvinfo.h"
+#include "trace.h"
+
+#include "gt/intel_gt_regs.h"
+
+#if defined(VERBOSE_DEBUG)
+#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
+#else
+#define gvt_vdbg_mm(fmt, args...)
+#endif
+
+static bool enable_out_of_sync = false;
+static int preallocated_oos_pages = 8192;
+
+/*
+ * validate a gm address and related range size,
+ * translate it to host gm address
+ */
+bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
+{
+	if (size == 0)
+		return vgpu_gmadr_is_valid(vgpu, addr);
+
+	if (vgpu_gmadr_is_aperture(vgpu, addr) &&
+	    vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
+		return true;
+	else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
+		 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
+		return true;
+
+	gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
+		     addr, size);
+	return false;
+}
+
+/* translate a guest gmadr to host gmadr */
+int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+
+	if (drm_WARN(&i915->drm, !vgpu_gmadr_is_valid(vgpu, g_addr),
+		     "invalid guest gmadr %llx\n", g_addr))
+		return -EACCES;
+
+	if (vgpu_gmadr_is_aperture(vgpu, g_addr))
+		*h_addr = vgpu_aperture_gmadr_base(vgpu)
+			  + (g_addr - vgpu_aperture_offset(vgpu));
+	else
+		*h_addr = vgpu_hidden_gmadr_base(vgpu)
+			  + (g_addr - vgpu_hidden_offset(vgpu));
+	return 0;
+}
+
+/* translate a host gmadr to guest gmadr */
+int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+
+	if (drm_WARN(&i915->drm, !gvt_gmadr_is_valid(vgpu->gvt, h_addr),
+		     "invalid host gmadr %llx\n", h_addr))
+		return -EACCES;
+
+	if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
+		*g_addr = vgpu_aperture_gmadr_base(vgpu)
+			+ (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
+	else
+		*g_addr = vgpu_hidden_gmadr_base(vgpu)
+			+ (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
+	return 0;
+}
+
+int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
+			     unsigned long *h_index)
+{
+	u64 h_addr;
+	int ret;
+
+	ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
+				       &h_addr);
+	if (ret)
+		return ret;
+
+	*h_index = h_addr >> I915_GTT_PAGE_SHIFT;
+	return 0;
+}
+
+int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
+			     unsigned long *g_index)
+{
+	u64 g_addr;
+	int ret;
+
+	ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
+				       &g_addr);
+	if (ret)
+		return ret;
+
+	*g_index = g_addr >> I915_GTT_PAGE_SHIFT;
+	return 0;
+}
+
+#define gtt_type_is_entry(type) \
+	(type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
+	 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
+	 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
+
+#define gtt_type_is_pt(type) \
+	(type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
+
+#define gtt_type_is_pte_pt(type) \
+	(type == GTT_TYPE_PPGTT_PTE_PT)
+
+#define gtt_type_is_root_pointer(type) \
+	(gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
+
+#define gtt_init_entry(e, t, p, v) do { \
+	(e)->type = t; \
+	(e)->pdev = p; \
+	memcpy(&(e)->val64, &v, sizeof(v)); \
+} while (0)
+
+/*
+ * Mappings between GTT_TYPE* enumerations.
+ * Following information can be found according to the given type:
+ * - type of next level page table
+ * - type of entry inside this level page table
+ * - type of entry with PSE set
+ *
+ * If the given type doesn't have such a kind of information,
+ * e.g. give a l4 root entry type, then request to get its PSE type,
+ * give a PTE page table type, then request to get its next level page
+ * table type, as we know l4 root entry doesn't have a PSE bit,
+ * and a PTE page table doesn't have a next level page table type,
+ * GTT_TYPE_INVALID will be returned. This is useful when traversing a
+ * page table.
+ */
+
+struct gtt_type_table_entry {
+	int entry_type;
+	int pt_type;
+	int next_pt_type;
+	int pse_entry_type;
+};
+
+#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
+	[type] = { \
+		.entry_type = e_type, \
+		.pt_type = cpt_type, \
+		.next_pt_type = npt_type, \
+		.pse_entry_type = pse_type, \
+	}
+
+static const struct gtt_type_table_entry gtt_type_table[] = {
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
+			GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_PPGTT_PML4_PT,
+			GTT_TYPE_INVALID),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
+			GTT_TYPE_PPGTT_PML4_ENTRY,
+			GTT_TYPE_PPGTT_PML4_PT,
+			GTT_TYPE_PPGTT_PDP_PT,
+			GTT_TYPE_INVALID),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
+			GTT_TYPE_PPGTT_PML4_ENTRY,
+			GTT_TYPE_PPGTT_PML4_PT,
+			GTT_TYPE_PPGTT_PDP_PT,
+			GTT_TYPE_INVALID),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
+			GTT_TYPE_PPGTT_PDP_ENTRY,
+			GTT_TYPE_PPGTT_PDP_PT,
+			GTT_TYPE_PPGTT_PDE_PT,
+			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
+			GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_PPGTT_PDE_PT,
+			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
+			GTT_TYPE_PPGTT_PDP_ENTRY,
+			GTT_TYPE_PPGTT_PDP_PT,
+			GTT_TYPE_PPGTT_PDE_PT,
+			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
+			GTT_TYPE_PPGTT_PDE_ENTRY,
+			GTT_TYPE_PPGTT_PDE_PT,
+			GTT_TYPE_PPGTT_PTE_PT,
+			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
+			GTT_TYPE_PPGTT_PDE_ENTRY,
+			GTT_TYPE_PPGTT_PDE_PT,
+			GTT_TYPE_PPGTT_PTE_PT,
+			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
+	/* We take IPS bit as 'PSE' for PTE level. */
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
+			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
+			GTT_TYPE_PPGTT_PTE_PT,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
+			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
+			GTT_TYPE_PPGTT_PTE_PT,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
+			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
+			GTT_TYPE_PPGTT_PTE_PT,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
+			GTT_TYPE_PPGTT_PDE_ENTRY,
+			GTT_TYPE_PPGTT_PDE_PT,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
+			GTT_TYPE_PPGTT_PDP_ENTRY,
+			GTT_TYPE_PPGTT_PDP_PT,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
+	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
+			GTT_TYPE_GGTT_PTE,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_INVALID,
+			GTT_TYPE_INVALID),
+};
+
+static inline int get_next_pt_type(int type)
+{
+	return gtt_type_table[type].next_pt_type;
+}
+
+static inline int get_entry_type(int type)
+{
+	return gtt_type_table[type].entry_type;
+}
+
+static inline int get_pse_type(int type)
+{
+	return gtt_type_table[type].pse_entry_type;
+}
+
+static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
+{
+	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
+
+	return readq(addr);
+}
+
+static void ggtt_invalidate(struct intel_gt *gt)
+{
+	mmio_hw_access_pre(gt);
+	intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
+	mmio_hw_access_post(gt);
+}
+
+static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
+{
+	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
+
+	writeq(pte, addr);
+}
+
+static inline int gtt_get_entry64(void *pt,
+		struct intel_gvt_gtt_entry *e,
+		unsigned long index, bool hypervisor_access, unsigned long gpa,
+		struct intel_vgpu *vgpu)
+{
+	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
+	int ret;
+
+	if (WARN_ON(info->gtt_entry_size != 8))
+		return -EINVAL;
+
+	if (hypervisor_access) {
+		ret = intel_gvt_read_gpa(vgpu, gpa +
+				(index << info->gtt_entry_size_shift),
+				&e->val64, 8);
+		if (WARN_ON(ret))
+			return ret;
+	} else if (!pt) {
+		e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index);
+	} else {
+		e->val64 = *((u64 *)pt + index);
+	}
+	return 0;
+}
+
+static inline int gtt_set_entry64(void *pt,
+		struct intel_gvt_gtt_entry *e,
+		unsigned long index, bool hypervisor_access, unsigned long gpa,
+		struct intel_vgpu *vgpu)
+{
+	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
+	int ret;
+
+	if (WARN_ON(info->gtt_entry_size != 8))
+		return -EINVAL;
+
+	if (hypervisor_access) {
+		ret = intel_gvt_write_gpa(vgpu, gpa +
+				(index << info->gtt_entry_size_shift),
+				&e->val64, 8);
+		if (WARN_ON(ret))
+			return ret;
+	} else if (!pt) {
+		write_pte64(vgpu->gvt->gt->ggtt, index, e->val64);
+	} else {
+		*((u64 *)pt + index) = e->val64;
+	}
+	return 0;
+}
+
+#define GTT_HAW 46
+
+#define ADDR_1G_MASK	GENMASK_ULL(GTT_HAW - 1, 30)
+#define ADDR_2M_MASK	GENMASK_ULL(GTT_HAW - 1, 21)
+#define ADDR_64K_MASK	GENMASK_ULL(GTT_HAW - 1, 16)
+#define ADDR_4K_MASK	GENMASK_ULL(GTT_HAW - 1, 12)
+
+#define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
+#define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
+
+#define GTT_64K_PTE_STRIDE 16
+
+static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
+{
+	unsigned long pfn;
+
+	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
+		pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
+	else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
+		pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
+	else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
+		pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
+	else
+		pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
+	return pfn;
+}
+
+static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
+{
+	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
+		e->val64 &= ~ADDR_1G_MASK;
+		pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
+	} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
+		e->val64 &= ~ADDR_2M_MASK;
+		pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
+	} else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
+		e->val64 &= ~ADDR_64K_MASK;
+		pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
+	} else {
+		e->val64 &= ~ADDR_4K_MASK;
+		pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
+	}
+
+	e->val64 |= (pfn << PAGE_SHIFT);
+}
+
+static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
+{
+	return !!(e->val64 & _PAGE_PSE);
+}
+
+static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
+{
+	if (gen8_gtt_test_pse(e)) {
+		switch (e->type) {
+		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
+			e->val64 &= ~_PAGE_PSE;
+			e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
+			break;
+		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
+			e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
+			e->val64 &= ~_PAGE_PSE;
+			break;
+		default:
+			WARN_ON(1);
+		}
+	}
+}
+
+static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
+{
+	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
+		return false;
+
+	return !!(e->val64 & GEN8_PDE_IPS_64K);
+}
+
+static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
+{
+	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
+		return;
+
+	e->val64 &= ~GEN8_PDE_IPS_64K;
+}
+
+static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
+{
+	/*
+	 * i915 writes PDP root pointer registers without present bit,
+	 * it also works, so we need to treat root pointer entry
+	 * specifically.
+	 */
+	if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
+			|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
+		return (e->val64 != 0);
+	else
+		return (e->val64 & GEN8_PAGE_PRESENT);
+}
+
+static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
+{
+	e->val64 &= ~GEN8_PAGE_PRESENT;
+}
+
+static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
+{
+	e->val64 |= GEN8_PAGE_PRESENT;
+}
+
+static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
+{
+	return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
+}
+
+static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
+{
+	e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
+}
+
+static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
+{
+	e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
+}
+
+/*
+ * Per-platform GMA routines.
+ */
+static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
+{
+	unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
+
+	trace_gma_index(__func__, gma, x);
+	return x;
+}
+
+#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
+static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
+{ \
+	unsigned long x = (exp); \
+	trace_gma_index(__func__, gma, x); \
+	return x; \
+}
+
+DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
+DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
+DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
+DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
+DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
+
+static const struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
+	.get_entry = gtt_get_entry64,
+	.set_entry = gtt_set_entry64,
+	.clear_present = gtt_entry_clear_present,
+	.set_present = gtt_entry_set_present,
+	.test_present = gen8_gtt_test_present,
+	.test_pse = gen8_gtt_test_pse,
+	.clear_pse = gen8_gtt_clear_pse,
+	.clear_ips = gen8_gtt_clear_ips,
+	.test_ips = gen8_gtt_test_ips,
+	.clear_64k_splited = gen8_gtt_clear_64k_splited,
+	.set_64k_splited = gen8_gtt_set_64k_splited,
+	.test_64k_splited = gen8_gtt_test_64k_splited,
+	.get_pfn = gen8_gtt_get_pfn,
+	.set_pfn = gen8_gtt_set_pfn,
+};
+
+static const struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
+	.gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
+	.gma_to_pte_index = gen8_gma_to_pte_index,
+	.gma_to_pde_index = gen8_gma_to_pde_index,
+	.gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
+	.gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
+	.gma_to_pml4_index = gen8_gma_to_pml4_index,
+};
+
+/* Update entry type per pse and ips bit. */
+static void update_entry_type_for_real(const struct intel_gvt_gtt_pte_ops *pte_ops,
+	struct intel_gvt_gtt_entry *entry, bool ips)
+{
+	switch (entry->type) {
+	case GTT_TYPE_PPGTT_PDE_ENTRY:
+	case GTT_TYPE_PPGTT_PDP_ENTRY:
+		if (pte_ops->test_pse(entry))
+			entry->type = get_pse_type(entry->type);
+		break;
+	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
+		if (ips)
+			entry->type = get_pse_type(entry->type);
+		break;
+	default:
+		GEM_BUG_ON(!gtt_type_is_entry(entry->type));
+	}
+
+	GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
+}
+
+/*
+ * MM helpers.
+ */
+static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index,
+		bool guest)
+{
+	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
+
+	GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
+
+	entry->type = mm->ppgtt_mm.root_entry_type;
+	pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
+			   mm->ppgtt_mm.shadow_pdps,
+			   entry, index, false, 0, mm->vgpu);
+	update_entry_type_for_real(pte_ops, entry, false);
+}
+
+static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index)
+{
+	_ppgtt_get_root_entry(mm, entry, index, true);
+}
+
+static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index)
+{
+	_ppgtt_get_root_entry(mm, entry, index, false);
+}
+
+static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index,
+		bool guest)
+{
+	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
+
+	pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
+			   mm->ppgtt_mm.shadow_pdps,
+			   entry, index, false, 0, mm->vgpu);
+}
+
+static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index)
+{
+	_ppgtt_set_root_entry(mm, entry, index, false);
+}
+
+static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index)
+{
+	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
+
+	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
+
+	entry->type = GTT_TYPE_GGTT_PTE;
+	pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
+			   false, 0, mm->vgpu);
+}
+
+static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index)
+{
+	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
+
+	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
+
+	pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
+			   false, 0, mm->vgpu);
+}
+
+static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index)
+{
+	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
+
+	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
+
+	pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
+}
+
+static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *entry, unsigned long index)
+{
+	const struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
+	unsigned long offset = index;
+
+	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
+
+	if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
+		offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
+		mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
+	} else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
+		offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
+		mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
+	}
+
+	pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
+}
+
+/*
+ * PPGTT shadow page table helpers.
+ */
+static inline int ppgtt_spt_get_entry(
+		struct intel_vgpu_ppgtt_spt *spt,
+		void *page_table, int type,
+		struct intel_gvt_gtt_entry *e, unsigned long index,
+		bool guest)
+{
+	struct intel_gvt *gvt = spt->vgpu->gvt;
+	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
+	int ret;
+
+	e->type = get_entry_type(type);
+
+	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
+		return -EINVAL;
+
+	ret = ops->get_entry(page_table, e, index, guest,
+			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
+			spt->vgpu);
+	if (ret)
+		return ret;
+
+	update_entry_type_for_real(ops, e, guest ?
+				   spt->guest_page.pde_ips : false);
+
+	gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
+		    type, e->type, index, e->val64);
+	return 0;
+}
+
+static inline int ppgtt_spt_set_entry(
+		struct intel_vgpu_ppgtt_spt *spt,
+		void *page_table, int type,
+		struct intel_gvt_gtt_entry *e, unsigned long index,
+		bool guest)
+{
+	struct intel_gvt *gvt = spt->vgpu->gvt;
+	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
+
+	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
+		return -EINVAL;
+
+	gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
+		    type, e->type, index, e->val64);
+
+	return ops->set_entry(page_table, e, index, guest,
+			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
+			spt->vgpu);
+}
+
+#define ppgtt_get_guest_entry(spt, e, index) \
+	ppgtt_spt_get_entry(spt, NULL, \
+		spt->guest_page.type, e, index, true)
+
+#define ppgtt_set_guest_entry(spt, e, index) \
+	ppgtt_spt_set_entry(spt, NULL, \
+		spt->guest_page.type, e, index, true)
+
+#define ppgtt_get_shadow_entry(spt, e, index) \
+	ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
+		spt->shadow_page.type, e, index, false)
+
+#define ppgtt_set_shadow_entry(spt, e, index) \
+	ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
+		spt->shadow_page.type, e, index, false)
+
+static void *alloc_spt(gfp_t gfp_mask)
+{
+	struct intel_vgpu_ppgtt_spt *spt;
+
+	spt = kzalloc(sizeof(*spt), gfp_mask);
+	if (!spt)
+		return NULL;
+
+	spt->shadow_page.page = alloc_page(gfp_mask);
+	if (!spt->shadow_page.page) {
+		kfree(spt);
+		return NULL;
+	}
+	return spt;
+}
+
+static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
+{
+	__free_page(spt->shadow_page.page);
+	kfree(spt);
+}
+
+static int detach_oos_page(struct intel_vgpu *vgpu,
+		struct intel_vgpu_oos_page *oos_page);
+
+static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
+{
+	struct device *kdev = spt->vgpu->gvt->gt->i915->drm.dev;
+
+	trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
+
+	dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
+		       DMA_BIDIRECTIONAL);
+
+	radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
+
+	if (spt->guest_page.gfn) {
+		if (spt->guest_page.oos_page)
+			detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
+
+		intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
+	}
+
+	list_del_init(&spt->post_shadow_list);
+	free_spt(spt);
+}
+
+static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_ppgtt_spt *spt, *spn;
+	struct radix_tree_iter iter;
+	LIST_HEAD(all_spt);
+	void __rcu **slot;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
+		spt = radix_tree_deref_slot(slot);
+		list_move(&spt->post_shadow_list, &all_spt);
+	}
+	rcu_read_unlock();
+
+	list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
+		ppgtt_free_spt(spt);
+}
+
+static int ppgtt_handle_guest_write_page_table_bytes(
+		struct intel_vgpu_ppgtt_spt *spt,
+		u64 pa, void *p_data, int bytes);
+
+static int ppgtt_write_protection_handler(
+		struct intel_vgpu_page_track *page_track,
+		u64 gpa, void *data, int bytes)
+{
+	struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
+
+	int ret;
+
+	if (bytes != 4 && bytes != 8)
+		return -EINVAL;
+
+	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
+	if (ret)
+		return ret;
+	return ret;
+}
+
+/* Find a spt by guest gfn. */
+static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
+		struct intel_vgpu *vgpu, unsigned long gfn)
+{
+	struct intel_vgpu_page_track *track;
+
+	track = intel_vgpu_find_page_track(vgpu, gfn);
+	if (track && track->handler == ppgtt_write_protection_handler)
+		return track->priv_data;
+
+	return NULL;
+}
+
+/* Find the spt by shadow page mfn. */
+static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
+		struct intel_vgpu *vgpu, unsigned long mfn)
+{
+	return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
+}
+
+static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
+
+/* Allocate shadow page table without guest page. */
+static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
+		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
+{
+	struct device *kdev = vgpu->gvt->gt->i915->drm.dev;
+	struct intel_vgpu_ppgtt_spt *spt = NULL;
+	dma_addr_t daddr;
+	int ret;
+
+retry:
+	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
+	if (!spt) {
+		if (reclaim_one_ppgtt_mm(vgpu->gvt))
+			goto retry;
+
+		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	spt->vgpu = vgpu;
+	atomic_set(&spt->refcount, 1);
+	INIT_LIST_HEAD(&spt->post_shadow_list);
+
+	/*
+	 * Init shadow_page.
+	 */
+	spt->shadow_page.type = type;
+	daddr = dma_map_page(kdev, spt->shadow_page.page,
+			     0, 4096, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(kdev, daddr)) {
+		gvt_vgpu_err("fail to map dma addr\n");
+		ret = -EINVAL;
+		goto err_free_spt;
+	}
+	spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
+	spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
+
+	ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
+	if (ret)
+		goto err_unmap_dma;
+
+	return spt;
+
+err_unmap_dma:
+	dma_unmap_page(kdev, daddr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+err_free_spt:
+	free_spt(spt);
+	return ERR_PTR(ret);
+}
+
+/* Allocate shadow page table associated with specific gfn. */
+static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
+		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
+		unsigned long gfn, bool guest_pde_ips)
+{
+	struct intel_vgpu_ppgtt_spt *spt;
+	int ret;
+
+	spt = ppgtt_alloc_spt(vgpu, type);
+	if (IS_ERR(spt))
+		return spt;
+
+	/*
+	 * Init guest_page.
+	 */
+	ret = intel_vgpu_register_page_track(vgpu, gfn,
+			ppgtt_write_protection_handler, spt);
+	if (ret) {
+		ppgtt_free_spt(spt);
+		return ERR_PTR(ret);
+	}
+
+	spt->guest_page.type = type;
+	spt->guest_page.gfn = gfn;
+	spt->guest_page.pde_ips = guest_pde_ips;
+
+	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
+
+	return spt;
+}
+
+#define pt_entry_size_shift(spt) \
+	((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
+
+#define pt_entries(spt) \
+	(I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
+
+#define for_each_present_guest_entry(spt, e, i) \
+	for (i = 0; i < pt_entries(spt); \
+	     i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
+		if (!ppgtt_get_guest_entry(spt, e, i) && \
+		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
+
+#define for_each_present_shadow_entry(spt, e, i) \
+	for (i = 0; i < pt_entries(spt); \
+	     i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
+		if (!ppgtt_get_shadow_entry(spt, e, i) && \
+		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
+
+#define for_each_shadow_entry(spt, e, i) \
+	for (i = 0; i < pt_entries(spt); \
+	     i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
+		if (!ppgtt_get_shadow_entry(spt, e, i))
+
+static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
+{
+	int v = atomic_read(&spt->refcount);
+
+	trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
+	atomic_inc(&spt->refcount);
+}
+
+static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
+{
+	int v = atomic_read(&spt->refcount);
+
+	trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
+	return atomic_dec_return(&spt->refcount);
+}
+
+static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
+
+static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
+		struct intel_gvt_gtt_entry *e)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	struct intel_vgpu_ppgtt_spt *s;
+	enum intel_gvt_gtt_type cur_pt_type;
+
+	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
+
+	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
+		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+		cur_pt_type = get_next_pt_type(e->type);
+
+		if (!gtt_type_is_pt(cur_pt_type) ||
+				!gtt_type_is_pt(cur_pt_type + 1)) {
+			drm_WARN(&i915->drm, 1,
+				 "Invalid page table type, cur_pt_type is: %d\n",
+				 cur_pt_type);
+			return -EINVAL;
+		}
+
+		cur_pt_type += 1;
+
+		if (ops->get_pfn(e) ==
+			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
+			return 0;
+	}
+	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
+	if (!s) {
+		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
+				ops->get_pfn(e));
+		return -ENXIO;
+	}
+	return ppgtt_invalidate_spt(s);
+}
+
+static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
+		struct intel_gvt_gtt_entry *entry)
+{
+	struct intel_vgpu *vgpu = spt->vgpu;
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	unsigned long pfn;
+	int type;
+
+	pfn = ops->get_pfn(entry);
+	type = spt->shadow_page.type;
+
+	/* Uninitialized spte or unshadowed spte. */
+	if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
+		return;
+
+	intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
+}
+
+static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
+{
+	struct intel_vgpu *vgpu = spt->vgpu;
+	struct intel_gvt_gtt_entry e;
+	unsigned long index;
+	int ret;
+
+	trace_spt_change(spt->vgpu->id, "die", spt,
+			spt->guest_page.gfn, spt->shadow_page.type);
+
+	if (ppgtt_put_spt(spt) > 0)
+		return 0;
+
+	for_each_present_shadow_entry(spt, &e, index) {
+		switch (e.type) {
+		case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
+			gvt_vdbg_mm("invalidate 4K entry\n");
+			ppgtt_invalidate_pte(spt, &e);
+			break;
+		case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
+			/* We don't setup 64K shadow entry so far. */
+			WARN(1, "suspicious 64K gtt entry\n");
+			continue;
+		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
+			gvt_vdbg_mm("invalidate 2M entry\n");
+			continue;
+		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
+			WARN(1, "GVT doesn't support 1GB page\n");
+			continue;
+		case GTT_TYPE_PPGTT_PML4_ENTRY:
+		case GTT_TYPE_PPGTT_PDP_ENTRY:
+		case GTT_TYPE_PPGTT_PDE_ENTRY:
+			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
+			ret = ppgtt_invalidate_spt_by_shadow_entry(
+					spt->vgpu, &e);
+			if (ret)
+				goto fail;
+			break;
+		default:
+			GEM_BUG_ON(1);
+		}
+	}
+
+	trace_spt_change(spt->vgpu->id, "release", spt,
+			 spt->guest_page.gfn, spt->shadow_page.type);
+	ppgtt_free_spt(spt);
+	return 0;
+fail:
+	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
+			spt, e.val64, e.type);
+	return ret;
+}
+
+static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+
+	if (GRAPHICS_VER(dev_priv) == 9) {
+		u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
+			GAMW_ECO_ENABLE_64K_IPS_FIELD;
+
+		return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
+	} else if (GRAPHICS_VER(dev_priv) >= 11) {
+		/* 64K paging only controlled by IPS bit in PTE now. */
+		return true;
+	} else
+		return false;
+}
+
+static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
+
+static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
+		struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
+{
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	struct intel_vgpu_ppgtt_spt *spt = NULL;
+	bool ips = false;
+	int ret;
+
+	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
+
+	if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
+		ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
+
+	spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
+	if (spt) {
+		ppgtt_get_spt(spt);
+
+		if (ips != spt->guest_page.pde_ips) {
+			spt->guest_page.pde_ips = ips;
+
+			gvt_dbg_mm("reshadow PDE since ips changed\n");
+			clear_page(spt->shadow_page.vaddr);
+			ret = ppgtt_populate_spt(spt);
+			if (ret) {
+				ppgtt_put_spt(spt);
+				goto err;
+			}
+		}
+	} else {
+		int type = get_next_pt_type(we->type);
+
+		if (!gtt_type_is_pt(type)) {
+			ret = -EINVAL;
+			goto err;
+		}
+
+		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
+		if (IS_ERR(spt)) {
+			ret = PTR_ERR(spt);
+			goto err;
+		}
+
+		ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
+		if (ret)
+			goto err_free_spt;
+
+		ret = ppgtt_populate_spt(spt);
+		if (ret)
+			goto err_free_spt;
+
+		trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
+				 spt->shadow_page.type);
+	}
+	return spt;
+
+err_free_spt:
+	ppgtt_free_spt(spt);
+	spt = NULL;
+err:
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+		     spt, we->val64, we->type);
+	return ERR_PTR(ret);
+}
+
+static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
+		struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
+{
+	const struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
+
+	se->type = ge->type;
+	se->val64 = ge->val64;
+
+	/* Because we always split 64KB pages, so clear IPS in shadow PDE. */
+	if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
+		ops->clear_ips(se);
+
+	ops->set_pfn(se, s->shadow_page.mfn);
+}
+
+static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
+	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
+	struct intel_gvt_gtt_entry *se)
+{
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	struct intel_vgpu_ppgtt_spt *sub_spt;
+	struct intel_gvt_gtt_entry sub_se;
+	unsigned long start_gfn;
+	dma_addr_t dma_addr;
+	unsigned long sub_index;
+	int ret;
+
+	gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
+
+	start_gfn = ops->get_pfn(se);
+
+	sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
+	if (IS_ERR(sub_spt))
+		return PTR_ERR(sub_spt);
+
+	for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
+		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + sub_index,
+						   PAGE_SIZE, &dma_addr);
+		if (ret)
+			goto err;
+		sub_se.val64 = se->val64;
+
+		/* Copy the PAT field from PDE. */
+		sub_se.val64 &= ~_PAGE_PAT;
+		sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
+
+		ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
+		ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
+	}
+
+	/* Clear dirty field. */
+	se->val64 &= ~_PAGE_DIRTY;
+
+	ops->clear_pse(se);
+	ops->clear_ips(se);
+	ops->set_pfn(se, sub_spt->shadow_page.mfn);
+	ppgtt_set_shadow_entry(spt, se, index);
+	return 0;
+err:
+	/* Cancel the existing addess mappings of DMA addr. */
+	for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
+		gvt_vdbg_mm("invalidate 4K entry\n");
+		ppgtt_invalidate_pte(sub_spt, &sub_se);
+	}
+	/* Release the new allocated spt. */
+	trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
+		sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
+	ppgtt_free_spt(sub_spt);
+	return ret;
+}
+
+static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
+	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
+	struct intel_gvt_gtt_entry *se)
+{
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	struct intel_gvt_gtt_entry entry = *se;
+	unsigned long start_gfn;
+	dma_addr_t dma_addr;
+	int i, ret;
+
+	gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
+
+	GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
+
+	start_gfn = ops->get_pfn(se);
+
+	entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
+	ops->set_64k_splited(&entry);
+
+	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
+		ret = intel_gvt_dma_map_guest_page(vgpu, start_gfn + i,
+						   PAGE_SIZE, &dma_addr);
+		if (ret)
+			return ret;
+
+		ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
+		ppgtt_set_shadow_entry(spt, &entry, index + i);
+	}
+	return 0;
+}
+
+static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
+	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
+	struct intel_gvt_gtt_entry *ge)
+{
+	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
+	struct intel_gvt_gtt_entry se = *ge;
+	unsigned long gfn;
+	dma_addr_t dma_addr;
+	int ret;
+
+	if (!pte_ops->test_present(ge))
+		return 0;
+
+	gfn = pte_ops->get_pfn(ge);
+
+	switch (ge->type) {
+	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
+		gvt_vdbg_mm("shadow 4K gtt entry\n");
+		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE, &dma_addr);
+		if (ret)
+			return -ENXIO;
+		break;
+	case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
+		gvt_vdbg_mm("shadow 64K gtt entry\n");
+		/*
+		 * The layout of 64K page is special, the page size is
+		 * controlled by uper PDE. To be simple, we always split
+		 * 64K page to smaller 4K pages in shadow PT.
+		 */
+		return split_64KB_gtt_entry(vgpu, spt, index, &se);
+	case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
+		gvt_vdbg_mm("shadow 2M gtt entry\n");
+		if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M) ||
+		    intel_gvt_dma_map_guest_page(vgpu, gfn,
+						 I915_GTT_PAGE_SIZE_2M, &dma_addr))
+			return split_2MB_gtt_entry(vgpu, spt, index, &se);
+		break;
+	case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
+		gvt_vgpu_err("GVT doesn't support 1GB entry\n");
+		return -EINVAL;
+	default:
+		GEM_BUG_ON(1);
+		return -EINVAL;
+	}
+
+	/* Successfully shadowed a 4K or 2M page (without splitting). */
+	pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
+	ppgtt_set_shadow_entry(spt, &se, index);
+	return 0;
+}
+
+static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
+{
+	struct intel_vgpu *vgpu = spt->vgpu;
+	struct intel_vgpu_ppgtt_spt *s;
+	struct intel_gvt_gtt_entry se, ge;
+	unsigned long i;
+	int ret;
+
+	trace_spt_change(spt->vgpu->id, "born", spt,
+			 spt->guest_page.gfn, spt->shadow_page.type);
+
+	for_each_present_guest_entry(spt, &ge, i) {
+		if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
+			s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
+			if (IS_ERR(s)) {
+				ret = PTR_ERR(s);
+				goto fail;
+			}
+			ppgtt_get_shadow_entry(spt, &se, i);
+			ppgtt_generate_shadow_entry(&se, s, &ge);
+			ppgtt_set_shadow_entry(spt, &se, i);
+		} else {
+			ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
+			if (ret)
+				goto fail;
+		}
+	}
+	return 0;
+fail:
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+			spt, ge.val64, ge.type);
+	return ret;
+}
+
+static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
+		struct intel_gvt_gtt_entry *se, unsigned long index)
+{
+	struct intel_vgpu *vgpu = spt->vgpu;
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	int ret;
+
+	trace_spt_guest_change(spt->vgpu->id, "remove", spt,
+			       spt->shadow_page.type, se->val64, index);
+
+	gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
+		    se->type, index, se->val64);
+
+	if (!ops->test_present(se))
+		return 0;
+
+	if (ops->get_pfn(se) ==
+	    vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
+		return 0;
+
+	if (gtt_type_is_pt(get_next_pt_type(se->type))) {
+		struct intel_vgpu_ppgtt_spt *s =
+			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
+		if (!s) {
+			gvt_vgpu_err("fail to find guest page\n");
+			ret = -ENXIO;
+			goto fail;
+		}
+		ret = ppgtt_invalidate_spt(s);
+		if (ret)
+			goto fail;
+	} else {
+		/* We don't setup 64K shadow entry so far. */
+		WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
+		     "suspicious 64K entry\n");
+		ppgtt_invalidate_pte(spt, se);
+	}
+
+	return 0;
+fail:
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
+			spt, se->val64, se->type);
+	return ret;
+}
+
+static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
+		struct intel_gvt_gtt_entry *we, unsigned long index)
+{
+	struct intel_vgpu *vgpu = spt->vgpu;
+	struct intel_gvt_gtt_entry m;
+	struct intel_vgpu_ppgtt_spt *s;
+	int ret;
+
+	trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
+			       we->val64, index);
+
+	gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
+		    we->type, index, we->val64);
+
+	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
+		s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
+		if (IS_ERR(s)) {
+			ret = PTR_ERR(s);
+			goto fail;
+		}
+		ppgtt_get_shadow_entry(spt, &m, index);
+		ppgtt_generate_shadow_entry(&m, s, we);
+		ppgtt_set_shadow_entry(spt, &m, index);
+	} else {
+		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
+		if (ret)
+			goto fail;
+	}
+	return 0;
+fail:
+	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
+		spt, we->val64, we->type);
+	return ret;
+}
+
+static int sync_oos_page(struct intel_vgpu *vgpu,
+		struct intel_vgpu_oos_page *oos_page)
+{
+	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
+	struct intel_gvt *gvt = vgpu->gvt;
+	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
+	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
+	struct intel_gvt_gtt_entry old, new;
+	int index;
+	int ret;
+
+	trace_oos_change(vgpu->id, "sync", oos_page->id,
+			 spt, spt->guest_page.type);
+
+	old.type = new.type = get_entry_type(spt->guest_page.type);
+	old.val64 = new.val64 = 0;
+
+	for (index = 0; index < (I915_GTT_PAGE_SIZE >>
+				info->gtt_entry_size_shift); index++) {
+		ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
+		ops->get_entry(NULL, &new, index, true,
+			       spt->guest_page.gfn << PAGE_SHIFT, vgpu);
+
+		if (old.val64 == new.val64
+			&& !test_and_clear_bit(index, spt->post_shadow_bitmap))
+			continue;
+
+		trace_oos_sync(vgpu->id, oos_page->id,
+				spt, spt->guest_page.type,
+				new.val64, index);
+
+		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
+		if (ret)
+			return ret;
+
+		ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
+	}
+
+	spt->guest_page.write_cnt = 0;
+	list_del_init(&spt->post_shadow_list);
+	return 0;
+}
+
+static int detach_oos_page(struct intel_vgpu *vgpu,
+		struct intel_vgpu_oos_page *oos_page)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
+
+	trace_oos_change(vgpu->id, "detach", oos_page->id,
+			 spt, spt->guest_page.type);
+
+	spt->guest_page.write_cnt = 0;
+	spt->guest_page.oos_page = NULL;
+	oos_page->spt = NULL;
+
+	list_del_init(&oos_page->vm_list);
+	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
+
+	return 0;
+}
+
+static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
+		struct intel_vgpu_ppgtt_spt *spt)
+{
+	struct intel_gvt *gvt = spt->vgpu->gvt;
+	int ret;
+
+	ret = intel_gvt_read_gpa(spt->vgpu,
+			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
+			oos_page->mem, I915_GTT_PAGE_SIZE);
+	if (ret)
+		return ret;
+
+	oos_page->spt = spt;
+	spt->guest_page.oos_page = oos_page;
+
+	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
+
+	trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
+			 spt, spt->guest_page.type);
+	return 0;
+}
+
+static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
+{
+	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
+	int ret;
+
+	ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
+	if (ret)
+		return ret;
+
+	trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
+			 spt, spt->guest_page.type);
+
+	list_del_init(&oos_page->vm_list);
+	return sync_oos_page(spt->vgpu, oos_page);
+}
+
+static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
+{
+	struct intel_gvt *gvt = spt->vgpu->gvt;
+	struct intel_gvt_gtt *gtt = &gvt->gtt;
+	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
+	int ret;
+
+	WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
+
+	if (list_empty(&gtt->oos_page_free_list_head)) {
+		oos_page = container_of(gtt->oos_page_use_list_head.next,
+			struct intel_vgpu_oos_page, list);
+		ret = ppgtt_set_guest_page_sync(oos_page->spt);
+		if (ret)
+			return ret;
+		ret = detach_oos_page(spt->vgpu, oos_page);
+		if (ret)
+			return ret;
+	} else
+		oos_page = container_of(gtt->oos_page_free_list_head.next,
+			struct intel_vgpu_oos_page, list);
+	return attach_oos_page(oos_page, spt);
+}
+
+static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
+{
+	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
+
+	if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
+		return -EINVAL;
+
+	trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
+			 spt, spt->guest_page.type);
+
+	list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
+	return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
+}
+
+/**
+ * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
+ * @vgpu: a vGPU
+ *
+ * This function is called before submitting a guest workload to host,
+ * to sync all the out-of-synced shadow for vGPU
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_oos_page *oos_page;
+	int ret;
+
+	if (!enable_out_of_sync)
+		return 0;
+
+	list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
+		oos_page = container_of(pos,
+				struct intel_vgpu_oos_page, vm_list);
+		ret = ppgtt_set_guest_page_sync(oos_page->spt);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/*
+ * The heart of PPGTT shadow page table.
+ */
+static int ppgtt_handle_guest_write_page_table(
+		struct intel_vgpu_ppgtt_spt *spt,
+		struct intel_gvt_gtt_entry *we, unsigned long index)
+{
+	struct intel_vgpu *vgpu = spt->vgpu;
+	int type = spt->shadow_page.type;
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	struct intel_gvt_gtt_entry old_se;
+	int new_present;
+	int i, ret;
+
+	new_present = ops->test_present(we);
+
+	/*
+	 * Adding the new entry first and then removing the old one, that can
+	 * guarantee the ppgtt table is validated during the window between
+	 * adding and removal.
+	 */
+	ppgtt_get_shadow_entry(spt, &old_se, index);
+
+	if (new_present) {
+		ret = ppgtt_handle_guest_entry_add(spt, we, index);
+		if (ret)
+			goto fail;
+	}
+
+	ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
+	if (ret)
+		goto fail;
+
+	if (!new_present) {
+		/* For 64KB splited entries, we need clear them all. */
+		if (ops->test_64k_splited(&old_se) &&
+		    !(index % GTT_64K_PTE_STRIDE)) {
+			gvt_vdbg_mm("remove splited 64K shadow entries\n");
+			for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
+				ops->clear_64k_splited(&old_se);
+				ops->set_pfn(&old_se,
+					vgpu->gtt.scratch_pt[type].page_mfn);
+				ppgtt_set_shadow_entry(spt, &old_se, index + i);
+			}
+		} else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
+			   old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
+			ops->clear_pse(&old_se);
+			ops->set_pfn(&old_se,
+				     vgpu->gtt.scratch_pt[type].page_mfn);
+			ppgtt_set_shadow_entry(spt, &old_se, index);
+		} else {
+			ops->set_pfn(&old_se,
+				     vgpu->gtt.scratch_pt[type].page_mfn);
+			ppgtt_set_shadow_entry(spt, &old_se, index);
+		}
+	}
+
+	return 0;
+fail:
+	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
+			spt, we->val64, we->type);
+	return ret;
+}
+
+
+
+static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
+{
+	return enable_out_of_sync
+		&& gtt_type_is_pte_pt(spt->guest_page.type)
+		&& spt->guest_page.write_cnt >= 2;
+}
+
+static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
+		unsigned long index)
+{
+	set_bit(index, spt->post_shadow_bitmap);
+	if (!list_empty(&spt->post_shadow_list))
+		return;
+
+	list_add_tail(&spt->post_shadow_list,
+			&spt->vgpu->gtt.post_shadow_list_head);
+}
+
+/**
+ * intel_vgpu_flush_post_shadow - flush the post shadow transactions
+ * @vgpu: a vGPU
+ *
+ * This function is called before submitting a guest workload to host,
+ * to flush all the post shadows for a vGPU.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_ppgtt_spt *spt;
+	struct intel_gvt_gtt_entry ge;
+	unsigned long index;
+	int ret;
+
+	list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
+		spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
+				post_shadow_list);
+
+		for_each_set_bit(index, spt->post_shadow_bitmap,
+				GTT_ENTRY_NUM_IN_ONE_PAGE) {
+			ppgtt_get_guest_entry(spt, &ge, index);
+
+			ret = ppgtt_handle_guest_write_page_table(spt,
+							&ge, index);
+			if (ret)
+				return ret;
+			clear_bit(index, spt->post_shadow_bitmap);
+		}
+		list_del_init(&spt->post_shadow_list);
+	}
+	return 0;
+}
+
+static int ppgtt_handle_guest_write_page_table_bytes(
+		struct intel_vgpu_ppgtt_spt *spt,
+		u64 pa, void *p_data, int bytes)
+{
+	struct intel_vgpu *vgpu = spt->vgpu;
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
+	struct intel_gvt_gtt_entry we, se;
+	unsigned long index;
+	int ret;
+
+	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
+
+	ppgtt_get_guest_entry(spt, &we, index);
+
+	/*
+	 * For page table which has 64K gtt entry, only PTE#0, PTE#16,
+	 * PTE#32, ... PTE#496 are used. Unused PTEs update should be
+	 * ignored.
+	 */
+	if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
+	    (index % GTT_64K_PTE_STRIDE)) {
+		gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
+			    index);
+		return 0;
+	}
+
+	if (bytes == info->gtt_entry_size) {
+		ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
+		if (ret)
+			return ret;
+	} else {
+		if (!test_bit(index, spt->post_shadow_bitmap)) {
+			int type = spt->shadow_page.type;
+
+			ppgtt_get_shadow_entry(spt, &se, index);
+			ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
+			if (ret)
+				return ret;
+			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
+			ppgtt_set_shadow_entry(spt, &se, index);
+		}
+		ppgtt_set_post_shadow(spt, index);
+	}
+
+	if (!enable_out_of_sync)
+		return 0;
+
+	spt->guest_page.write_cnt++;
+
+	if (spt->guest_page.oos_page)
+		ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
+				false, 0, vgpu);
+
+	if (can_do_out_of_sync(spt)) {
+		if (!spt->guest_page.oos_page)
+			ppgtt_allocate_oos_page(spt);
+
+		ret = ppgtt_set_guest_page_oos(spt);
+		if (ret < 0)
+			return ret;
+	}
+	return 0;
+}
+
+static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
+{
+	struct intel_vgpu *vgpu = mm->vgpu;
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gvt_gtt *gtt = &gvt->gtt;
+	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
+	struct intel_gvt_gtt_entry se;
+	int index;
+
+	if (!mm->ppgtt_mm.shadowed)
+		return;
+
+	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
+		ppgtt_get_shadow_root_entry(mm, &se, index);
+
+		if (!ops->test_present(&se))
+			continue;
+
+		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
+		se.val64 = 0;
+		ppgtt_set_shadow_root_entry(mm, &se, index);
+
+		trace_spt_guest_change(vgpu->id, "destroy root pointer",
+				       NULL, se.type, se.val64, index);
+	}
+
+	mm->ppgtt_mm.shadowed = false;
+}
+
+
+static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
+{
+	struct intel_vgpu *vgpu = mm->vgpu;
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gvt_gtt *gtt = &gvt->gtt;
+	const struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
+	struct intel_vgpu_ppgtt_spt *spt;
+	struct intel_gvt_gtt_entry ge, se;
+	int index, ret;
+
+	if (mm->ppgtt_mm.shadowed)
+		return 0;
+
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+		return -EINVAL;
+
+	mm->ppgtt_mm.shadowed = true;
+
+	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
+		ppgtt_get_guest_root_entry(mm, &ge, index);
+
+		if (!ops->test_present(&ge))
+			continue;
+
+		trace_spt_guest_change(vgpu->id, __func__, NULL,
+				       ge.type, ge.val64, index);
+
+		spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
+		if (IS_ERR(spt)) {
+			gvt_vgpu_err("fail to populate guest root pointer\n");
+			ret = PTR_ERR(spt);
+			goto fail;
+		}
+		ppgtt_generate_shadow_entry(&se, spt, &ge);
+		ppgtt_set_shadow_root_entry(mm, &se, index);
+
+		trace_spt_guest_change(vgpu->id, "populate root pointer",
+				       NULL, se.type, se.val64, index);
+	}
+
+	return 0;
+fail:
+	invalidate_ppgtt_mm(mm);
+	return ret;
+}
+
+static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_mm *mm;
+
+	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
+	if (!mm)
+		return NULL;
+
+	mm->vgpu = vgpu;
+	kref_init(&mm->ref);
+	atomic_set(&mm->pincount, 0);
+
+	return mm;
+}
+
+static void vgpu_free_mm(struct intel_vgpu_mm *mm)
+{
+	kfree(mm);
+}
+
+/**
+ * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
+ * @vgpu: a vGPU
+ * @root_entry_type: ppgtt root entry type
+ * @pdps: guest pdps.
+ *
+ * This function is used to create a ppgtt mm object for a vGPU.
+ *
+ * Returns:
+ * Zero on success, negative error code in pointer if failed.
+ */
+struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_vgpu_mm *mm;
+	int ret;
+
+	mm = vgpu_alloc_mm(vgpu);
+	if (!mm)
+		return ERR_PTR(-ENOMEM);
+
+	mm->type = INTEL_GVT_MM_PPGTT;
+
+	GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
+		   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
+	mm->ppgtt_mm.root_entry_type = root_entry_type;
+
+	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
+	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
+	INIT_LIST_HEAD(&mm->ppgtt_mm.link);
+
+	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
+		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
+	else
+		memcpy(mm->ppgtt_mm.guest_pdps, pdps,
+		       sizeof(mm->ppgtt_mm.guest_pdps));
+
+	ret = shadow_ppgtt_mm(mm);
+	if (ret) {
+		gvt_vgpu_err("failed to shadow ppgtt mm\n");
+		vgpu_free_mm(mm);
+		return ERR_PTR(ret);
+	}
+
+	list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
+
+	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
+	list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
+	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
+
+	return mm;
+}
+
+static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_mm *mm;
+	unsigned long nr_entries;
+
+	mm = vgpu_alloc_mm(vgpu);
+	if (!mm)
+		return ERR_PTR(-ENOMEM);
+
+	mm->type = INTEL_GVT_MM_GGTT;
+
+	nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
+	mm->ggtt_mm.virtual_ggtt =
+		vzalloc(array_size(nr_entries,
+				   vgpu->gvt->device_info.gtt_entry_size));
+	if (!mm->ggtt_mm.virtual_ggtt) {
+		vgpu_free_mm(mm);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
+	if (!mm->ggtt_mm.host_ggtt_aperture) {
+		vfree(mm->ggtt_mm.virtual_ggtt);
+		vgpu_free_mm(mm);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
+	if (!mm->ggtt_mm.host_ggtt_hidden) {
+		vfree(mm->ggtt_mm.host_ggtt_aperture);
+		vfree(mm->ggtt_mm.virtual_ggtt);
+		vgpu_free_mm(mm);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	return mm;
+}
+
+/**
+ * _intel_vgpu_mm_release - destroy a mm object
+ * @mm_ref: a kref object
+ *
+ * This function is used to destroy a mm object for vGPU
+ *
+ */
+void _intel_vgpu_mm_release(struct kref *mm_ref)
+{
+	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
+
+	if (GEM_WARN_ON(atomic_read(&mm->pincount)))
+		gvt_err("vgpu mm pin count bug detected\n");
+
+	if (mm->type == INTEL_GVT_MM_PPGTT) {
+		list_del(&mm->ppgtt_mm.list);
+
+		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
+		list_del(&mm->ppgtt_mm.lru_list);
+		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
+
+		invalidate_ppgtt_mm(mm);
+	} else {
+		vfree(mm->ggtt_mm.virtual_ggtt);
+		vfree(mm->ggtt_mm.host_ggtt_aperture);
+		vfree(mm->ggtt_mm.host_ggtt_hidden);
+	}
+
+	vgpu_free_mm(mm);
+}
+
+/**
+ * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
+ * @mm: a vGPU mm object
+ *
+ * This function is called when user doesn't want to use a vGPU mm object
+ */
+void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
+{
+	atomic_dec_if_positive(&mm->pincount);
+}
+
+/**
+ * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
+ * @mm: target vgpu mm
+ *
+ * This function is called when user wants to use a vGPU mm object. If this
+ * mm object hasn't been shadowed yet, the shadow will be populated at this
+ * time.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
+{
+	int ret;
+
+	atomic_inc(&mm->pincount);
+
+	if (mm->type == INTEL_GVT_MM_PPGTT) {
+		ret = shadow_ppgtt_mm(mm);
+		if (ret)
+			return ret;
+
+		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
+		list_move_tail(&mm->ppgtt_mm.lru_list,
+			       &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
+		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
+	}
+
+	return 0;
+}
+
+static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
+{
+	struct intel_vgpu_mm *mm;
+	struct list_head *pos, *n;
+
+	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
+
+	list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
+
+		if (atomic_read(&mm->pincount))
+			continue;
+
+		list_del_init(&mm->ppgtt_mm.lru_list);
+		mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
+		invalidate_ppgtt_mm(mm);
+		return 1;
+	}
+	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
+	return 0;
+}
+
+/*
+ * GMA translation APIs.
+ */
+static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
+		struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
+{
+	struct intel_vgpu *vgpu = mm->vgpu;
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	struct intel_vgpu_ppgtt_spt *s;
+
+	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
+	if (!s)
+		return -ENXIO;
+
+	if (!guest)
+		ppgtt_get_shadow_entry(s, e, index);
+	else
+		ppgtt_get_guest_entry(s, e, index);
+	return 0;
+}
+
+/**
+ * intel_vgpu_gma_to_gpa - translate a gma to GPA
+ * @mm: mm object. could be a PPGTT or GGTT mm object
+ * @gma: graphics memory address in this mm object
+ *
+ * This function is used to translate a graphics memory address in specific
+ * graphics memory space to guest physical address.
+ *
+ * Returns:
+ * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
+ */
+unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
+{
+	struct intel_vgpu *vgpu = mm->vgpu;
+	struct intel_gvt *gvt = vgpu->gvt;
+	const struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
+	const struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
+	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
+	unsigned long gma_index[4];
+	struct intel_gvt_gtt_entry e;
+	int i, levels = 0;
+	int ret;
+
+	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
+		   mm->type != INTEL_GVT_MM_PPGTT);
+
+	if (mm->type == INTEL_GVT_MM_GGTT) {
+		if (!vgpu_gmadr_is_valid(vgpu, gma))
+			goto err;
+
+		ggtt_get_guest_entry(mm, &e,
+			gma_ops->gma_to_ggtt_pte_index(gma));
+
+		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
+			+ (gma & ~I915_GTT_PAGE_MASK);
+
+		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
+	} else {
+		switch (mm->ppgtt_mm.root_entry_type) {
+		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
+			ppgtt_get_shadow_root_entry(mm, &e, 0);
+
+			gma_index[0] = gma_ops->gma_to_pml4_index(gma);
+			gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
+			gma_index[2] = gma_ops->gma_to_pde_index(gma);
+			gma_index[3] = gma_ops->gma_to_pte_index(gma);
+			levels = 4;
+			break;
+		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
+			ppgtt_get_shadow_root_entry(mm, &e,
+					gma_ops->gma_to_l3_pdp_index(gma));
+
+			gma_index[0] = gma_ops->gma_to_pde_index(gma);
+			gma_index[1] = gma_ops->gma_to_pte_index(gma);
+			levels = 2;
+			break;
+		default:
+			GEM_BUG_ON(1);
+		}
+
+		/* walk the shadow page table and get gpa from guest entry */
+		for (i = 0; i < levels; i++) {
+			ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
+				(i == levels - 1));
+			if (ret)
+				goto err;
+
+			if (!pte_ops->test_present(&e)) {
+				gvt_dbg_core("GMA 0x%lx is not present\n", gma);
+				goto err;
+			}
+		}
+
+		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
+					(gma & ~I915_GTT_PAGE_MASK);
+		trace_gma_translate(vgpu->id, "ppgtt", 0,
+				    mm->ppgtt_mm.root_entry_type, gma, gpa);
+	}
+
+	return gpa;
+err:
+	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
+	return INTEL_GVT_INVALID_ADDR;
+}
+
+static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
+	unsigned int off, void *p_data, unsigned int bytes)
+{
+	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
+	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
+	unsigned long index = off >> info->gtt_entry_size_shift;
+	unsigned long gma;
+	struct intel_gvt_gtt_entry e;
+
+	if (bytes != 4 && bytes != 8)
+		return -EINVAL;
+
+	gma = index << I915_GTT_PAGE_SHIFT;
+	if (!intel_gvt_ggtt_validate_range(vgpu,
+					   gma, 1 << I915_GTT_PAGE_SHIFT)) {
+		gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
+		memset(p_data, 0, bytes);
+		return 0;
+	}
+
+	ggtt_get_guest_entry(ggtt_mm, &e, index);
+	memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
+			bytes);
+	return 0;
+}
+
+/**
+ * intel_vgpu_emulate_ggtt_mmio_read - emulate GTT MMIO register read
+ * @vgpu: a vGPU
+ * @off: register offset
+ * @p_data: data will be returned to guest
+ * @bytes: data length
+ *
+ * This function is used to emulate the GTT MMIO register read
+ *
+ * Returns:
+ * Zero on success, error code if failed.
+ */
+int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
+	void *p_data, unsigned int bytes)
+{
+	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
+	int ret;
+
+	if (bytes != 4 && bytes != 8)
+		return -EINVAL;
+
+	off -= info->gtt_start_offset;
+	ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
+	return ret;
+}
+
+static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
+		struct intel_gvt_gtt_entry *entry)
+{
+	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
+	unsigned long pfn;
+
+	pfn = pte_ops->get_pfn(entry);
+	if (pfn != vgpu->gvt->gtt.scratch_mfn)
+		intel_gvt_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
+}
+
+static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
+	void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	const struct intel_gvt_device_info *info = &gvt->device_info;
+	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
+	const struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
+	unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
+	unsigned long gma, gfn;
+	struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
+	struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
+	dma_addr_t dma_addr;
+	int ret;
+	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
+	bool partial_update = false;
+
+	if (bytes != 4 && bytes != 8)
+		return -EINVAL;
+
+	gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
+
+	/* the VM may configure the whole GM space when ballooning is used */
+	if (!vgpu_gmadr_is_valid(vgpu, gma))
+		return 0;
+
+	e.type = GTT_TYPE_GGTT_PTE;
+	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
+			bytes);
+
+	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
+	 * write, save the first 4 bytes in a list and update virtual
+	 * PTE. Only update shadow PTE when the second 4 bytes comes.
+	 */
+	if (bytes < info->gtt_entry_size) {
+		bool found = false;
+
+		list_for_each_entry_safe(pos, n,
+				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
+			if (g_gtt_index == pos->offset >>
+					info->gtt_entry_size_shift) {
+				if (off != pos->offset) {
+					/* the second partial part*/
+					int last_off = pos->offset &
+						(info->gtt_entry_size - 1);
+
+					memcpy((void *)&e.val64 + last_off,
+						(void *)&pos->data + last_off,
+						bytes);
+
+					list_del(&pos->list);
+					kfree(pos);
+					found = true;
+					break;
+				}
+
+				/* update of the first partial part */
+				pos->data = e.val64;
+				ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
+				return 0;
+			}
+		}
+
+		if (!found) {
+			/* the first partial part */
+			partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
+			if (!partial_pte)
+				return -ENOMEM;
+			partial_pte->offset = off;
+			partial_pte->data = e.val64;
+			list_add_tail(&partial_pte->list,
+				&ggtt_mm->ggtt_mm.partial_pte_list);
+			partial_update = true;
+		}
+	}
+
+	if (!partial_update && (ops->test_present(&e))) {
+		gfn = ops->get_pfn(&e);
+		m.val64 = e.val64;
+		m.type = e.type;
+
+		ret = intel_gvt_dma_map_guest_page(vgpu, gfn, PAGE_SIZE,
+						   &dma_addr);
+		if (ret) {
+			gvt_vgpu_err("fail to populate guest ggtt entry\n");
+			/* guest driver may read/write the entry when partial
+			 * update the entry in this situation p2m will fail
+			 * setting the shadow entry to point to a scratch page
+			 */
+			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
+		} else
+			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
+	} else {
+		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
+		ops->clear_present(&m);
+	}
+
+	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
+
+	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
+	ggtt_invalidate_pte(vgpu, &e);
+
+	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
+	ggtt_invalidate(gvt->gt);
+	return 0;
+}
+
+/*
+ * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
+ * @vgpu: a vGPU
+ * @off: register offset
+ * @p_data: data from guest write
+ * @bytes: data length
+ *
+ * This function is used to emulate the GTT MMIO register write
+ *
+ * Returns:
+ * Zero on success, error code if failed.
+ */
+int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int off, void *p_data, unsigned int bytes)
+{
+	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
+	int ret;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	int i;
+
+	if (bytes != 4 && bytes != 8)
+		return -EINVAL;
+
+	off -= info->gtt_start_offset;
+	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
+
+	/* if ggtt of last submitted context is written,
+	 * that context is probably got unpinned.
+	 * Set last shadowed ctx to invalid.
+	 */
+	for_each_engine(engine, vgpu->gvt->gt, i) {
+		if (!s->last_ctx[i].valid)
+			continue;
+
+		if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
+			s->last_ctx[i].valid = false;
+	}
+	return ret;
+}
+
+static int alloc_scratch_pages(struct intel_vgpu *vgpu,
+		enum intel_gvt_gtt_type type)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
+	const struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
+	int page_entry_num = I915_GTT_PAGE_SIZE >>
+				vgpu->gvt->device_info.gtt_entry_size_shift;
+	void *scratch_pt;
+	int i;
+	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
+	dma_addr_t daddr;
+
+	if (drm_WARN_ON(&i915->drm,
+			type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
+		return -EINVAL;
+
+	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!scratch_pt) {
+		gvt_vgpu_err("fail to allocate scratch page\n");
+		return -ENOMEM;
+	}
+
+	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0, 4096, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, daddr)) {
+		gvt_vgpu_err("fail to dmamap scratch_pt\n");
+		__free_page(virt_to_page(scratch_pt));
+		return -ENOMEM;
+	}
+	gtt->scratch_pt[type].page_mfn =
+		(unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
+	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
+	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
+			vgpu->id, type, gtt->scratch_pt[type].page_mfn);
+
+	/* Build the tree by full filled the scratch pt with the entries which
+	 * point to the next level scratch pt or scratch page. The
+	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
+	 * 'type' pt.
+	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
+	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
+	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
+	 */
+	if (type > GTT_TYPE_PPGTT_PTE_PT) {
+		struct intel_gvt_gtt_entry se;
+
+		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
+		se.type = get_entry_type(type - 1);
+		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
+
+		/* The entry parameters like present/writeable/cache type
+		 * set to the same as i915's scratch page tree.
+		 */
+		se.val64 |= GEN8_PAGE_PRESENT | GEN8_PAGE_RW;
+		if (type == GTT_TYPE_PPGTT_PDE_PT)
+			se.val64 |= PPAT_CACHED;
+
+		for (i = 0; i < page_entry_num; i++)
+			ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
+	}
+
+	return 0;
+}
+
+static int release_scratch_page_tree(struct intel_vgpu *vgpu)
+{
+	int i;
+	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
+	dma_addr_t daddr;
+
+	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
+		if (vgpu->gtt.scratch_pt[i].page != NULL) {
+			daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
+					I915_GTT_PAGE_SHIFT);
+			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
+			__free_page(vgpu->gtt.scratch_pt[i].page);
+			vgpu->gtt.scratch_pt[i].page = NULL;
+			vgpu->gtt.scratch_pt[i].page_mfn = 0;
+		}
+	}
+
+	return 0;
+}
+
+static int create_scratch_page_tree(struct intel_vgpu *vgpu)
+{
+	int i, ret;
+
+	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
+		ret = alloc_scratch_pages(vgpu, i);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+
+err:
+	release_scratch_page_tree(vgpu);
+	return ret;
+}
+
+/**
+ * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
+ * @vgpu: a vGPU
+ *
+ * This function is used to initialize per-vGPU graphics memory virtualization
+ * components.
+ *
+ * Returns:
+ * Zero on success, error code if failed.
+ */
+int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
+
+	INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
+
+	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
+	INIT_LIST_HEAD(&gtt->oos_page_list_head);
+	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
+
+	gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
+	if (IS_ERR(gtt->ggtt_mm)) {
+		gvt_vgpu_err("fail to create mm for ggtt.\n");
+		return PTR_ERR(gtt->ggtt_mm);
+	}
+
+	intel_vgpu_reset_ggtt(vgpu, false);
+
+	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
+
+	return create_scratch_page_tree(vgpu);
+}
+
+void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_mm *mm;
+
+	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
+		intel_vgpu_destroy_mm(mm);
+	}
+
+	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
+		gvt_err("vgpu ppgtt mm is not fully destroyed\n");
+
+	if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
+		gvt_err("Why we still has spt not freed?\n");
+		ppgtt_free_all_spt(vgpu);
+	}
+}
+
+static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt_partial_pte *pos, *next;
+
+	list_for_each_entry_safe(pos, next,
+				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
+				 list) {
+		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
+			pos->offset, pos->data);
+		kfree(pos);
+	}
+	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
+	vgpu->gtt.ggtt_mm = NULL;
+}
+
+/**
+ * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
+ * @vgpu: a vGPU
+ *
+ * This function is used to clean up per-vGPU graphics memory virtualization
+ * components.
+ *
+ * Returns:
+ * Zero on success, error code if failed.
+ */
+void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
+{
+	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
+	intel_vgpu_destroy_ggtt_mm(vgpu);
+	release_scratch_page_tree(vgpu);
+}
+
+static void clean_spt_oos(struct intel_gvt *gvt)
+{
+	struct intel_gvt_gtt *gtt = &gvt->gtt;
+	struct list_head *pos, *n;
+	struct intel_vgpu_oos_page *oos_page;
+
+	WARN(!list_empty(&gtt->oos_page_use_list_head),
+		"someone is still using oos page\n");
+
+	list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
+		oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
+		list_del(&oos_page->list);
+		free_page((unsigned long)oos_page->mem);
+		kfree(oos_page);
+	}
+}
+
+static int setup_spt_oos(struct intel_gvt *gvt)
+{
+	struct intel_gvt_gtt *gtt = &gvt->gtt;
+	struct intel_vgpu_oos_page *oos_page;
+	int i;
+	int ret;
+
+	INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
+	INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
+
+	for (i = 0; i < preallocated_oos_pages; i++) {
+		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
+		if (!oos_page) {
+			ret = -ENOMEM;
+			goto fail;
+		}
+		oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
+		if (!oos_page->mem) {
+			ret = -ENOMEM;
+			kfree(oos_page);
+			goto fail;
+		}
+
+		INIT_LIST_HEAD(&oos_page->list);
+		INIT_LIST_HEAD(&oos_page->vm_list);
+		oos_page->id = i;
+		list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
+	}
+
+	gvt_dbg_mm("%d oos pages preallocated\n", i);
+
+	return 0;
+fail:
+	clean_spt_oos(gvt);
+	return ret;
+}
+
+/**
+ * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
+ * @vgpu: a vGPU
+ * @pdps: pdp root array
+ *
+ * This function is used to find a PPGTT mm object from mm object pool
+ *
+ * Returns:
+ * pointer to mm object on success, NULL if failed.
+ */
+struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
+		u64 pdps[])
+{
+	struct intel_vgpu_mm *mm;
+	struct list_head *pos;
+
+	list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
+
+		switch (mm->ppgtt_mm.root_entry_type) {
+		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
+			if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
+				return mm;
+			break;
+		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
+			if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
+				    sizeof(mm->ppgtt_mm.guest_pdps)))
+				return mm;
+			break;
+		default:
+			GEM_BUG_ON(1);
+		}
+	}
+	return NULL;
+}
+
+/**
+ * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
+ * @vgpu: a vGPU
+ * @root_entry_type: ppgtt root entry type
+ * @pdps: guest pdps
+ *
+ * This function is used to find or create a PPGTT mm object from a guest.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
+{
+	struct intel_vgpu_mm *mm;
+
+	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
+	if (mm) {
+		intel_vgpu_mm_get(mm);
+	} else {
+		mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
+		if (IS_ERR(mm))
+			gvt_vgpu_err("fail to create mm\n");
+	}
+	return mm;
+}
+
+/**
+ * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
+ * @vgpu: a vGPU
+ * @pdps: guest pdps
+ *
+ * This function is used to find a PPGTT mm object from a guest and destroy it.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
+{
+	struct intel_vgpu_mm *mm;
+
+	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
+	if (!mm) {
+		gvt_vgpu_err("fail to find ppgtt instance.\n");
+		return -EINVAL;
+	}
+	intel_vgpu_mm_put(mm);
+	return 0;
+}
+
+/**
+ * intel_gvt_init_gtt - initialize mm components of a GVT device
+ * @gvt: GVT device
+ *
+ * This function is called at the initialization stage, to initialize
+ * the mm components of a GVT device.
+ *
+ * Returns:
+ * zero on success, negative error code if failed.
+ */
+int intel_gvt_init_gtt(struct intel_gvt *gvt)
+{
+	int ret;
+	void *page;
+	struct device *dev = gvt->gt->i915->drm.dev;
+	dma_addr_t daddr;
+
+	gvt_dbg_core("init gtt\n");
+
+	gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
+	gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
+
+	page = (void *)get_zeroed_page(GFP_KERNEL);
+	if (!page) {
+		gvt_err("fail to allocate scratch ggtt page\n");
+		return -ENOMEM;
+	}
+
+	daddr = dma_map_page(dev, virt_to_page(page), 0,
+			4096, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, daddr)) {
+		gvt_err("fail to dmamap scratch ggtt page\n");
+		__free_page(virt_to_page(page));
+		return -ENOMEM;
+	}
+
+	gvt->gtt.scratch_page = virt_to_page(page);
+	gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
+
+	if (enable_out_of_sync) {
+		ret = setup_spt_oos(gvt);
+		if (ret) {
+			gvt_err("fail to initialize SPT oos\n");
+			dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
+			__free_page(gvt->gtt.scratch_page);
+			return ret;
+		}
+	}
+	INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
+	mutex_init(&gvt->gtt.ppgtt_mm_lock);
+	return 0;
+}
+
+/**
+ * intel_gvt_clean_gtt - clean up mm components of a GVT device
+ * @gvt: GVT device
+ *
+ * This function is called at the driver unloading stage, to clean up
+ * the mm components of a GVT device.
+ *
+ */
+void intel_gvt_clean_gtt(struct intel_gvt *gvt)
+{
+	struct device *dev = gvt->gt->i915->drm.dev;
+	dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
+					I915_GTT_PAGE_SHIFT);
+
+	dma_unmap_page(dev, daddr, 4096, DMA_BIDIRECTIONAL);
+
+	__free_page(gvt->gtt.scratch_page);
+
+	if (enable_out_of_sync)
+		clean_spt_oos(gvt);
+}
+
+/**
+ * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
+ * @vgpu: a vGPU
+ *
+ * This function is called when invalidate all PPGTT instances of a vGPU.
+ *
+ */
+void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_mm *mm;
+
+	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
+		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
+		if (mm->type == INTEL_GVT_MM_PPGTT) {
+			mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
+			list_del_init(&mm->ppgtt_mm.lru_list);
+			mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
+			if (mm->ppgtt_mm.shadowed)
+				invalidate_ppgtt_mm(mm);
+		}
+	}
+}
+
+/**
+ * intel_vgpu_reset_ggtt - reset the GGTT entry
+ * @vgpu: a vGPU
+ * @invalidate_old: invalidate old entries
+ *
+ * This function is called at the vGPU create stage
+ * to reset all the GGTT entries.
+ *
+ */
+void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	const struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
+	struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
+	struct intel_gvt_gtt_entry old_entry;
+	u32 index;
+	u32 num_entries;
+
+	pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
+	pte_ops->set_present(&entry);
+
+	index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
+	num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
+	while (num_entries--) {
+		if (invalidate_old) {
+			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
+			ggtt_invalidate_pte(vgpu, &old_entry);
+		}
+		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
+	}
+
+	index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
+	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
+	while (num_entries--) {
+		if (invalidate_old) {
+			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
+			ggtt_invalidate_pte(vgpu, &old_entry);
+		}
+		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
+	}
+
+	ggtt_invalidate(gvt->gt);
+}
+
+/**
+ * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
+ * @gvt: intel gvt device
+ *
+ * This function is called at driver resume stage to restore
+ * GGTT entries of every vGPU.
+ *
+ */
+void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
+{
+	struct intel_vgpu *vgpu;
+	struct intel_vgpu_mm *mm;
+	int id;
+	gen8_pte_t pte;
+	u32 idx, num_low, num_hi, offset;
+
+	/* Restore dirty host ggtt for all vGPUs */
+	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
+		mm = vgpu->gtt.ggtt_mm;
+
+		num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
+		offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
+		for (idx = 0; idx < num_low; idx++) {
+			pte = mm->ggtt_mm.host_ggtt_aperture[idx];
+			if (pte & GEN8_PAGE_PRESENT)
+				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
+		}
+
+		num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
+		offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
+		for (idx = 0; idx < num_hi; idx++) {
+			pte = mm->ggtt_mm.host_ggtt_hidden[idx];
+			if (pte & GEN8_PAGE_PRESENT)
+				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
new file mode 100644
index 0000000000..4cb183e06e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -0,0 +1,296 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *    Zhenyu Wang <zhenyuw@linux.intel.com>
+ *    Xiao Zheng <xiao.zheng@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *
+ */
+
+#ifndef _GVT_GTT_H_
+#define _GVT_GTT_H_
+
+#include <linux/kernel.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/radix-tree.h>
+
+#include "gt/intel_gtt.h"
+
+struct intel_gvt;
+struct intel_vgpu;
+struct intel_vgpu_mm;
+
+#define I915_GTT_PAGE_SHIFT         12
+
+#define INTEL_GVT_INVALID_ADDR (~0UL)
+
+struct intel_gvt_gtt_entry {
+	u64 val64;
+	int type;
+};
+
+struct intel_gvt_gtt_pte_ops {
+	int (*get_entry)(void *pt,
+			 struct intel_gvt_gtt_entry *e,
+			 unsigned long index,
+			 bool hypervisor_access,
+			 unsigned long gpa,
+			 struct intel_vgpu *vgpu);
+	int (*set_entry)(void *pt,
+			 struct intel_gvt_gtt_entry *e,
+			 unsigned long index,
+			 bool hypervisor_access,
+			 unsigned long gpa,
+			 struct intel_vgpu *vgpu);
+	bool (*test_present)(struct intel_gvt_gtt_entry *e);
+	void (*clear_present)(struct intel_gvt_gtt_entry *e);
+	void (*set_present)(struct intel_gvt_gtt_entry *e);
+	bool (*test_pse)(struct intel_gvt_gtt_entry *e);
+	void (*clear_pse)(struct intel_gvt_gtt_entry *e);
+	bool (*test_ips)(struct intel_gvt_gtt_entry *e);
+	void (*clear_ips)(struct intel_gvt_gtt_entry *e);
+	bool (*test_64k_splited)(struct intel_gvt_gtt_entry *e);
+	void (*clear_64k_splited)(struct intel_gvt_gtt_entry *e);
+	void (*set_64k_splited)(struct intel_gvt_gtt_entry *e);
+	void (*set_pfn)(struct intel_gvt_gtt_entry *e, unsigned long pfn);
+	unsigned long (*get_pfn)(struct intel_gvt_gtt_entry *e);
+};
+
+struct intel_gvt_gtt_gma_ops {
+	unsigned long (*gma_to_ggtt_pte_index)(unsigned long gma);
+	unsigned long (*gma_to_pte_index)(unsigned long gma);
+	unsigned long (*gma_to_pde_index)(unsigned long gma);
+	unsigned long (*gma_to_l3_pdp_index)(unsigned long gma);
+	unsigned long (*gma_to_l4_pdp_index)(unsigned long gma);
+	unsigned long (*gma_to_pml4_index)(unsigned long gma);
+};
+
+struct intel_gvt_gtt {
+	const struct intel_gvt_gtt_pte_ops *pte_ops;
+	const struct intel_gvt_gtt_gma_ops *gma_ops;
+	int (*mm_alloc_page_table)(struct intel_vgpu_mm *mm);
+	void (*mm_free_page_table)(struct intel_vgpu_mm *mm);
+	struct list_head oos_page_use_list_head;
+	struct list_head oos_page_free_list_head;
+	struct mutex ppgtt_mm_lock;
+	struct list_head ppgtt_mm_lru_list_head;
+
+	struct page *scratch_page;
+	unsigned long scratch_mfn;
+};
+
+enum intel_gvt_gtt_type {
+	GTT_TYPE_INVALID = 0,
+
+	GTT_TYPE_GGTT_PTE,
+
+	GTT_TYPE_PPGTT_PTE_4K_ENTRY,
+	GTT_TYPE_PPGTT_PTE_64K_ENTRY,
+	GTT_TYPE_PPGTT_PTE_2M_ENTRY,
+	GTT_TYPE_PPGTT_PTE_1G_ENTRY,
+
+	GTT_TYPE_PPGTT_PTE_ENTRY,
+
+	GTT_TYPE_PPGTT_PDE_ENTRY,
+	GTT_TYPE_PPGTT_PDP_ENTRY,
+	GTT_TYPE_PPGTT_PML4_ENTRY,
+
+	GTT_TYPE_PPGTT_ROOT_ENTRY,
+
+	GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
+	GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
+
+	GTT_TYPE_PPGTT_ENTRY,
+
+	GTT_TYPE_PPGTT_PTE_PT,
+	GTT_TYPE_PPGTT_PDE_PT,
+	GTT_TYPE_PPGTT_PDP_PT,
+	GTT_TYPE_PPGTT_PML4_PT,
+
+	GTT_TYPE_MAX,
+};
+
+enum intel_gvt_mm_type {
+	INTEL_GVT_MM_GGTT,
+	INTEL_GVT_MM_PPGTT,
+};
+
+#define GVT_RING_CTX_NR_PDPS	GEN8_3LVL_PDPES
+
+struct intel_gvt_partial_pte {
+	unsigned long offset;
+	u64 data;
+	struct list_head list;
+};
+
+struct intel_vgpu_mm {
+	enum intel_gvt_mm_type type;
+	struct intel_vgpu *vgpu;
+
+	struct kref ref;
+	atomic_t pincount;
+
+	union {
+		struct {
+			enum intel_gvt_gtt_type root_entry_type;
+			/*
+			 * The 4 PDPs in ring context. For 48bit addressing,
+			 * only PDP0 is valid and point to PML4. For 32it
+			 * addressing, all 4 are used as true PDPs.
+			 */
+			u64 guest_pdps[GVT_RING_CTX_NR_PDPS];
+			u64 shadow_pdps[GVT_RING_CTX_NR_PDPS];
+			bool shadowed;
+
+			struct list_head list;
+			struct list_head lru_list;
+			struct list_head link; /* possible LRI shadow mm list */
+		} ppgtt_mm;
+		struct {
+			void *virtual_ggtt;
+			/* Save/restore for PM */
+			u64 *host_ggtt_aperture;
+			u64 *host_ggtt_hidden;
+			struct list_head partial_pte_list;
+		} ggtt_mm;
+	};
+};
+
+struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
+
+static inline void intel_vgpu_mm_get(struct intel_vgpu_mm *mm)
+{
+	kref_get(&mm->ref);
+}
+
+void _intel_vgpu_mm_release(struct kref *mm_ref);
+
+static inline void intel_vgpu_mm_put(struct intel_vgpu_mm *mm)
+{
+	kref_put(&mm->ref, _intel_vgpu_mm_release);
+}
+
+static inline void intel_vgpu_destroy_mm(struct intel_vgpu_mm *mm)
+{
+	intel_vgpu_mm_put(mm);
+}
+
+struct intel_vgpu_guest_page;
+
+struct intel_vgpu_scratch_pt {
+	struct page *page;
+	unsigned long page_mfn;
+};
+
+struct intel_vgpu_gtt {
+	struct intel_vgpu_mm *ggtt_mm;
+	unsigned long active_ppgtt_mm_bitmap;
+	struct list_head ppgtt_mm_list_head;
+	struct radix_tree_root spt_tree;
+	struct list_head oos_page_list_head;
+	struct list_head post_shadow_list_head;
+	struct intel_vgpu_scratch_pt scratch_pt[GTT_TYPE_MAX];
+};
+
+int intel_vgpu_init_gtt(struct intel_vgpu *vgpu);
+void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu);
+void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old);
+void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu);
+
+int intel_gvt_init_gtt(struct intel_gvt *gvt);
+void intel_gvt_clean_gtt(struct intel_gvt *gvt);
+
+struct intel_vgpu_mm *intel_gvt_find_ppgtt_mm(struct intel_vgpu *vgpu,
+					      int page_table_level,
+					      void *root_entry);
+
+struct intel_vgpu_oos_page {
+	struct intel_vgpu_ppgtt_spt *spt;
+	struct list_head list;
+	struct list_head vm_list;
+	int id;
+	void *mem;
+};
+
+#define GTT_ENTRY_NUM_IN_ONE_PAGE 512
+
+/* Represent a vgpu shadow page table. */
+struct intel_vgpu_ppgtt_spt {
+	atomic_t refcount;
+	struct intel_vgpu *vgpu;
+
+	struct {
+		enum intel_gvt_gtt_type type;
+		bool pde_ips; /* for 64KB PTEs */
+		void *vaddr;
+		struct page *page;
+		unsigned long mfn;
+	} shadow_page;
+
+	struct {
+		enum intel_gvt_gtt_type type;
+		bool pde_ips; /* for 64KB PTEs */
+		unsigned long gfn;
+		unsigned long write_cnt;
+		struct intel_vgpu_oos_page *oos_page;
+	} guest_page;
+
+	DECLARE_BITMAP(post_shadow_bitmap, GTT_ENTRY_NUM_IN_ONE_PAGE);
+	struct list_head post_shadow_list;
+};
+
+int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu);
+
+int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu);
+
+int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm);
+
+void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm);
+
+unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm,
+		unsigned long gma);
+
+struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
+		u64 pdps[]);
+
+struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
+		enum intel_gvt_gtt_type root_entry_type, u64 pdps[]);
+
+int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[]);
+
+int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
+	unsigned int off, void *p_data, unsigned int bytes);
+
+int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
+	unsigned int off, void *p_data, unsigned int bytes);
+
+void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu);
+void intel_gvt_restore_ggtt(struct intel_gvt *gvt);
+
+#endif /* _GVT_GTT_H_ */
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
new file mode 100644
index 0000000000..53a0a42a50
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -0,0 +1,771 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Eddie Dong <eddie.dong@intel.com>
+ *
+ * Contributors:
+ *    Niu Bing <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#ifndef _GVT_H_
+#define _GVT_H_
+
+#include <uapi/linux/pci_regs.h>
+#include <linux/vfio.h>
+#include <linux/mdev.h>
+
+#include <asm/kvm_page_track.h>
+
+#include "i915_drv.h"
+#include "intel_gvt.h"
+
+#include "debug.h"
+#include "mmio.h"
+#include "reg.h"
+#include "interrupt.h"
+#include "gtt.h"
+#include "display.h"
+#include "edid.h"
+#include "execlist.h"
+#include "scheduler.h"
+#include "sched_policy.h"
+#include "mmio_context.h"
+#include "cmd_parser.h"
+#include "fb_decoder.h"
+#include "dmabuf.h"
+#include "page_track.h"
+
+#define GVT_MAX_VGPU 8
+
+/* Describe per-platform limitations. */
+struct intel_gvt_device_info {
+	u32 max_support_vgpus;
+	u32 cfg_space_size;
+	u32 mmio_size;
+	u32 mmio_bar;
+	unsigned long msi_cap_offset;
+	u32 gtt_start_offset;
+	u32 gtt_entry_size;
+	u32 gtt_entry_size_shift;
+	int gmadr_bytes_in_cmd;
+	u32 max_surface_size;
+};
+
+/* GM resources owned by a vGPU */
+struct intel_vgpu_gm {
+	u64 aperture_sz;
+	u64 hidden_sz;
+	struct drm_mm_node low_gm_node;
+	struct drm_mm_node high_gm_node;
+};
+
+#define INTEL_GVT_MAX_NUM_FENCES 32
+
+/* Fences owned by a vGPU */
+struct intel_vgpu_fence {
+	struct i915_fence_reg *regs[INTEL_GVT_MAX_NUM_FENCES];
+	u32 base;
+	u32 size;
+};
+
+struct intel_vgpu_mmio {
+	void *vreg;
+};
+
+#define INTEL_GVT_MAX_BAR_NUM 4
+
+struct intel_vgpu_pci_bar {
+	u64 size;
+	bool tracked;
+};
+
+struct intel_vgpu_cfg_space {
+	unsigned char virtual_cfg_space[PCI_CFG_SPACE_EXP_SIZE];
+	struct intel_vgpu_pci_bar bar[INTEL_GVT_MAX_BAR_NUM];
+	u32 pmcsr_off;
+};
+
+#define vgpu_cfg_space(vgpu) ((vgpu)->cfg_space.virtual_cfg_space)
+
+struct intel_vgpu_irq {
+	bool irq_warn_once[INTEL_GVT_EVENT_MAX];
+	DECLARE_BITMAP(flip_done_event[I915_MAX_PIPES],
+		       INTEL_GVT_EVENT_MAX);
+};
+
+struct intel_vgpu_opregion {
+	bool mapped;
+	void *va;
+	u32 gfn[INTEL_GVT_OPREGION_PAGES];
+};
+
+#define vgpu_opregion(vgpu) (&(vgpu->opregion))
+
+struct intel_vgpu_display {
+	struct intel_vgpu_i2c_edid i2c_edid;
+	struct intel_vgpu_port ports[I915_MAX_PORTS];
+	struct intel_vgpu_sbi sbi;
+	enum port port_num;
+};
+
+struct vgpu_sched_ctl {
+	int weight;
+};
+
+enum {
+	INTEL_VGPU_EXECLIST_SUBMISSION = 1,
+	INTEL_VGPU_GUC_SUBMISSION,
+};
+
+struct intel_vgpu_submission_ops {
+	const char *name;
+	int (*init)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask);
+	void (*clean)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask);
+	void (*reset)(struct intel_vgpu *vgpu, intel_engine_mask_t engine_mask);
+};
+
+struct intel_vgpu_submission {
+	struct intel_vgpu_execlist execlist[I915_NUM_ENGINES];
+	struct list_head workload_q_head[I915_NUM_ENGINES];
+	struct intel_context *shadow[I915_NUM_ENGINES];
+	struct kmem_cache *workloads;
+	atomic_t running_workload_num;
+	union {
+		u64 i915_context_pml4;
+		u64 i915_context_pdps[GEN8_3LVL_PDPES];
+	};
+	DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES);
+	DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
+	void *ring_scan_buffer[I915_NUM_ENGINES];
+	int ring_scan_buffer_size[I915_NUM_ENGINES];
+	const struct intel_vgpu_submission_ops *ops;
+	int virtual_submission_interface;
+	bool active;
+	struct {
+		u32 lrca;
+		bool valid;
+		u64 ring_context_gpa;
+	} last_ctx[I915_NUM_ENGINES];
+};
+
+#define KVMGT_DEBUGFS_FILENAME		"kvmgt_nr_cache_entries"
+
+enum {
+	INTEL_VGPU_STATUS_ATTACHED = 0,
+	INTEL_VGPU_STATUS_ACTIVE,
+	INTEL_VGPU_STATUS_NR_BITS,
+};
+
+struct intel_vgpu {
+	struct vfio_device vfio_device;
+	struct intel_gvt *gvt;
+	struct mutex vgpu_lock;
+	int id;
+	DECLARE_BITMAP(status, INTEL_VGPU_STATUS_NR_BITS);
+	bool pv_notified;
+	bool failsafe;
+	unsigned int resetting_eng;
+
+	/* Both sched_data and sched_ctl can be seen a part of the global gvt
+	 * scheduler structure. So below 2 vgpu data are protected
+	 * by sched_lock, not vgpu_lock.
+	 */
+	void *sched_data;
+	struct vgpu_sched_ctl sched_ctl;
+
+	struct intel_vgpu_fence fence;
+	struct intel_vgpu_gm gm;
+	struct intel_vgpu_cfg_space cfg_space;
+	struct intel_vgpu_mmio mmio;
+	struct intel_vgpu_irq irq;
+	struct intel_vgpu_gtt gtt;
+	struct intel_vgpu_opregion opregion;
+	struct intel_vgpu_display display;
+	struct intel_vgpu_submission submission;
+	struct radix_tree_root page_track_tree;
+	u32 hws_pga[I915_NUM_ENGINES];
+	/* Set on PCI_D3, reset on DMLR, not reflecting the actual PM state */
+	bool d3_entered;
+
+	struct dentry *debugfs;
+
+	struct list_head dmabuf_obj_list_head;
+	struct mutex dmabuf_lock;
+	struct idr object_idr;
+	struct intel_vgpu_vblank_timer vblank_timer;
+
+	u32 scan_nonprivbb;
+
+	struct vfio_region *region;
+	int num_regions;
+	struct eventfd_ctx *intx_trigger;
+	struct eventfd_ctx *msi_trigger;
+
+	/*
+	 * Two caches are used to avoid mapping duplicated pages (eg.
+	 * scratch pages). This help to reduce dma setup overhead.
+	 */
+	struct rb_root gfn_cache;
+	struct rb_root dma_addr_cache;
+	unsigned long nr_cache_entries;
+	struct mutex cache_lock;
+
+	struct kvm_page_track_notifier_node track_node;
+#define NR_BKT (1 << 18)
+	struct hlist_head ptable[NR_BKT];
+#undef NR_BKT
+};
+
+/* validating GM healthy status*/
+#define vgpu_is_vm_unhealthy(ret_val) \
+	(((ret_val) == -EBADRQC) || ((ret_val) == -EFAULT))
+
+struct intel_gvt_gm {
+	unsigned long vgpu_allocated_low_gm_size;
+	unsigned long vgpu_allocated_high_gm_size;
+};
+
+struct intel_gvt_fence {
+	unsigned long vgpu_allocated_fence_num;
+};
+
+/* Special MMIO blocks. */
+struct gvt_mmio_block {
+	unsigned int device;
+	i915_reg_t   offset;
+	unsigned int size;
+	gvt_mmio_func read;
+	gvt_mmio_func write;
+};
+
+#define INTEL_GVT_MMIO_HASH_BITS 11
+
+struct intel_gvt_mmio {
+	u16 *mmio_attribute;
+/* Register contains RO bits */
+#define F_RO		(1 << 0)
+/* Register contains graphics address */
+#define F_GMADR		(1 << 1)
+/* Mode mask registers with high 16 bits as the mask bits */
+#define F_MODE_MASK	(1 << 2)
+/* This reg can be accessed by GPU commands */
+#define F_CMD_ACCESS	(1 << 3)
+/* This reg has been accessed by a VM */
+#define F_ACCESSED	(1 << 4)
+/* This reg requires save & restore during host PM suspend/resume */
+#define F_PM_SAVE	(1 << 5)
+/* This reg could be accessed by unaligned address */
+#define F_UNALIGN	(1 << 6)
+/* This reg is in GVT's mmio save-restor list and in hardware
+ * logical context image
+ */
+#define F_SR_IN_CTX	(1 << 7)
+/* Value of command write of this reg needs to be patched */
+#define F_CMD_WRITE_PATCH	(1 << 8)
+
+	struct gvt_mmio_block *mmio_block;
+	unsigned int num_mmio_block;
+
+	DECLARE_HASHTABLE(mmio_info_table, INTEL_GVT_MMIO_HASH_BITS);
+	unsigned long num_tracked_mmio;
+};
+
+struct intel_gvt_firmware {
+	void *cfg_space;
+	void *mmio;
+	bool firmware_loaded;
+};
+
+struct intel_vgpu_config {
+	unsigned int low_mm;
+	unsigned int high_mm;
+	unsigned int fence;
+
+	/*
+	 * A vGPU with a weight of 8 will get twice as much GPU as a vGPU with
+	 * a weight of 4 on a contended host, different vGPU type has different
+	 * weight set. Legal weights range from 1 to 16.
+	 */
+	unsigned int weight;
+	enum intel_vgpu_edid edid;
+	const char *name;
+};
+
+struct intel_vgpu_type {
+	struct mdev_type type;
+	char name[16];
+	const struct intel_vgpu_config *conf;
+};
+
+struct intel_gvt {
+	/* GVT scope lock, protect GVT itself, and all resource currently
+	 * not yet protected by special locks(vgpu and scheduler lock).
+	 */
+	struct mutex lock;
+	/* scheduler scope lock, protect gvt and vgpu schedule related data */
+	struct mutex sched_lock;
+
+	struct intel_gt *gt;
+	struct idr vgpu_idr;	/* vGPU IDR pool */
+
+	struct intel_gvt_device_info device_info;
+	struct intel_gvt_gm gm;
+	struct intel_gvt_fence fence;
+	struct intel_gvt_mmio mmio;
+	struct intel_gvt_firmware firmware;
+	struct intel_gvt_irq irq;
+	struct intel_gvt_gtt gtt;
+	struct intel_gvt_workload_scheduler scheduler;
+	struct notifier_block shadow_ctx_notifier_block[I915_NUM_ENGINES];
+	DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
+	struct mdev_parent parent;
+	struct mdev_type **mdev_types;
+	struct intel_vgpu_type *types;
+	unsigned int num_types;
+	struct intel_vgpu *idle_vgpu;
+
+	struct task_struct *service_thread;
+	wait_queue_head_t service_thread_wq;
+
+	/* service_request is always used in bit operation, we should always
+	 * use it with atomic bit ops so that no need to use gvt big lock.
+	 */
+	unsigned long service_request;
+
+	struct {
+		struct engine_mmio *mmio;
+		int ctx_mmio_count[I915_NUM_ENGINES];
+		u32 *tlb_mmio_offset_list;
+		u32 tlb_mmio_offset_list_cnt;
+		u32 *mocs_mmio_offset_list;
+		u32 mocs_mmio_offset_list_cnt;
+	} engine_mmio_list;
+	bool is_reg_whitelist_updated;
+
+	struct dentry *debugfs_root;
+};
+
+static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915)
+{
+	return i915->gvt;
+}
+
+enum {
+	/* Scheduling trigger by timer */
+	INTEL_GVT_REQUEST_SCHED = 0,
+
+	/* Scheduling trigger by event */
+	INTEL_GVT_REQUEST_EVENT_SCHED = 1,
+
+	/* per-vGPU vblank emulation request */
+	INTEL_GVT_REQUEST_EMULATE_VBLANK = 2,
+	INTEL_GVT_REQUEST_EMULATE_VBLANK_MAX = INTEL_GVT_REQUEST_EMULATE_VBLANK
+		+ GVT_MAX_VGPU,
+};
+
+static inline void intel_gvt_request_service(struct intel_gvt *gvt,
+		int service)
+{
+	set_bit(service, (void *)&gvt->service_request);
+	wake_up(&gvt->service_thread_wq);
+}
+
+void intel_gvt_free_firmware(struct intel_gvt *gvt);
+int intel_gvt_load_firmware(struct intel_gvt *gvt);
+
+/* Aperture/GM space definitions for GVT device */
+#define MB_TO_BYTES(mb) ((mb) << 20ULL)
+#define BYTES_TO_MB(b) ((b) >> 20ULL)
+
+#define HOST_LOW_GM_SIZE MB_TO_BYTES(128)
+#define HOST_HIGH_GM_SIZE MB_TO_BYTES(384)
+#define HOST_FENCE 4
+
+#define gvt_to_ggtt(gvt)	((gvt)->gt->ggtt)
+
+/* Aperture/GM space definitions for GVT device */
+#define gvt_aperture_sz(gvt)	  gvt_to_ggtt(gvt)->mappable_end
+#define gvt_aperture_pa_base(gvt) gvt_to_ggtt(gvt)->gmadr.start
+
+#define gvt_ggtt_gm_sz(gvt)	gvt_to_ggtt(gvt)->vm.total
+#define gvt_ggtt_sz(gvt)	(gvt_to_ggtt(gvt)->vm.total >> PAGE_SHIFT << 3)
+#define gvt_hidden_sz(gvt)	(gvt_ggtt_gm_sz(gvt) - gvt_aperture_sz(gvt))
+
+#define gvt_aperture_gmadr_base(gvt) (0)
+#define gvt_aperture_gmadr_end(gvt) (gvt_aperture_gmadr_base(gvt) \
+				     + gvt_aperture_sz(gvt) - 1)
+
+#define gvt_hidden_gmadr_base(gvt) (gvt_aperture_gmadr_base(gvt) \
+				    + gvt_aperture_sz(gvt))
+#define gvt_hidden_gmadr_end(gvt) (gvt_hidden_gmadr_base(gvt) \
+				   + gvt_hidden_sz(gvt) - 1)
+
+#define gvt_fence_sz(gvt) (gvt_to_ggtt(gvt)->num_fences)
+
+/* Aperture/GM space definitions for vGPU */
+#define vgpu_aperture_offset(vgpu)	((vgpu)->gm.low_gm_node.start)
+#define vgpu_hidden_offset(vgpu)	((vgpu)->gm.high_gm_node.start)
+#define vgpu_aperture_sz(vgpu)		((vgpu)->gm.aperture_sz)
+#define vgpu_hidden_sz(vgpu)		((vgpu)->gm.hidden_sz)
+
+#define vgpu_aperture_pa_base(vgpu) \
+	(gvt_aperture_pa_base(vgpu->gvt) + vgpu_aperture_offset(vgpu))
+
+#define vgpu_ggtt_gm_sz(vgpu) ((vgpu)->gm.aperture_sz + (vgpu)->gm.hidden_sz)
+
+#define vgpu_aperture_pa_end(vgpu) \
+	(vgpu_aperture_pa_base(vgpu) + vgpu_aperture_sz(vgpu) - 1)
+
+#define vgpu_aperture_gmadr_base(vgpu) (vgpu_aperture_offset(vgpu))
+#define vgpu_aperture_gmadr_end(vgpu) \
+	(vgpu_aperture_gmadr_base(vgpu) + vgpu_aperture_sz(vgpu) - 1)
+
+#define vgpu_hidden_gmadr_base(vgpu) (vgpu_hidden_offset(vgpu))
+#define vgpu_hidden_gmadr_end(vgpu) \
+	(vgpu_hidden_gmadr_base(vgpu) + vgpu_hidden_sz(vgpu) - 1)
+
+#define vgpu_fence_base(vgpu) (vgpu->fence.base)
+#define vgpu_fence_sz(vgpu) (vgpu->fence.size)
+
+/* ring context size i.e. the first 0x50 dwords*/
+#define RING_CTX_SIZE 320
+
+int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu,
+			      const struct intel_vgpu_config *conf);
+void intel_vgpu_reset_resource(struct intel_vgpu *vgpu);
+void intel_vgpu_free_resource(struct intel_vgpu *vgpu);
+void intel_vgpu_write_fence(struct intel_vgpu *vgpu,
+	u32 fence, u64 value);
+
+/* Macros for easily accessing vGPU virtual/shadow register.
+   Explicitly seperate use for typed MMIO reg or real offset.*/
+#define vgpu_vreg_t(vgpu, reg) \
+	(*(u32 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg)))
+#define vgpu_vreg(vgpu, offset) \
+	(*(u32 *)(vgpu->mmio.vreg + (offset)))
+#define vgpu_vreg64_t(vgpu, reg) \
+	(*(u64 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg)))
+#define vgpu_vreg64(vgpu, offset) \
+	(*(u64 *)(vgpu->mmio.vreg + (offset)))
+
+#define for_each_active_vgpu(gvt, vgpu, id) \
+	idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) \
+		for_each_if(test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
+
+static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu,
+					    u32 offset, u32 val, bool low)
+{
+	u32 *pval;
+
+	/* BAR offset should be 32 bits algiend */
+	offset = rounddown(offset, 4);
+	pval = (u32 *)(vgpu_cfg_space(vgpu) + offset);
+
+	if (low) {
+		/*
+		 * only update bit 31 - bit 4,
+		 * leave the bit 3 - bit 0 unchanged.
+		 */
+		*pval = (val & GENMASK(31, 4)) | (*pval & GENMASK(3, 0));
+	} else {
+		*pval = val;
+	}
+}
+
+int intel_gvt_init_vgpu_types(struct intel_gvt *gvt);
+void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt);
+
+struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt);
+void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu);
+int intel_gvt_create_vgpu(struct intel_vgpu *vgpu,
+			  const struct intel_vgpu_config *conf);
+void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_release_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
+				 intel_engine_mask_t engine_mask);
+void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu);
+
+int intel_gvt_set_opregion(struct intel_vgpu *vgpu);
+int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num);
+
+/* validating GM functions */
+#define vgpu_gmadr_is_aperture(vgpu, gmadr) \
+	((gmadr >= vgpu_aperture_gmadr_base(vgpu)) && \
+	 (gmadr <= vgpu_aperture_gmadr_end(vgpu)))
+
+#define vgpu_gmadr_is_hidden(vgpu, gmadr) \
+	((gmadr >= vgpu_hidden_gmadr_base(vgpu)) && \
+	 (gmadr <= vgpu_hidden_gmadr_end(vgpu)))
+
+#define vgpu_gmadr_is_valid(vgpu, gmadr) \
+	 ((vgpu_gmadr_is_aperture(vgpu, gmadr) || \
+	  (vgpu_gmadr_is_hidden(vgpu, gmadr))))
+
+#define gvt_gmadr_is_aperture(gvt, gmadr) \
+	 ((gmadr >= gvt_aperture_gmadr_base(gvt)) && \
+	  (gmadr <= gvt_aperture_gmadr_end(gvt)))
+
+#define gvt_gmadr_is_hidden(gvt, gmadr) \
+	  ((gmadr >= gvt_hidden_gmadr_base(gvt)) && \
+	   (gmadr <= gvt_hidden_gmadr_end(gvt)))
+
+#define gvt_gmadr_is_valid(gvt, gmadr) \
+	  (gvt_gmadr_is_aperture(gvt, gmadr) || \
+	    gvt_gmadr_is_hidden(gvt, gmadr))
+
+bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size);
+int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr);
+int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr);
+int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
+			     unsigned long *h_index);
+int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
+			     unsigned long *g_index);
+
+void intel_vgpu_init_cfg_space(struct intel_vgpu *vgpu,
+		bool primary);
+void intel_vgpu_reset_cfg_space(struct intel_vgpu *vgpu);
+
+int intel_vgpu_emulate_cfg_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes);
+
+int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes);
+
+void intel_vgpu_emulate_hotplug(struct intel_vgpu *vgpu, bool connected);
+
+static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar)
+{
+	/* We are 64bit bar. */
+	return (*(u64 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
+			PCI_BASE_ADDRESS_MEM_MASK;
+}
+
+void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu);
+int intel_vgpu_init_opregion(struct intel_vgpu *vgpu);
+int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa);
+
+int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
+void populate_pvinfo_page(struct intel_vgpu *vgpu);
+
+int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload);
+void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason);
+void intel_vgpu_detach_regions(struct intel_vgpu *vgpu);
+
+enum {
+	GVT_FAILSAFE_UNSUPPORTED_GUEST,
+	GVT_FAILSAFE_INSUFFICIENT_RESOURCE,
+	GVT_FAILSAFE_GUEST_ERR,
+};
+
+static inline void mmio_hw_access_pre(struct intel_gt *gt)
+{
+	intel_runtime_pm_get(gt->uncore->rpm);
+}
+
+static inline void mmio_hw_access_post(struct intel_gt *gt)
+{
+	intel_runtime_pm_put_unchecked(gt->uncore->rpm);
+}
+
+/**
+ * intel_gvt_mmio_set_accessed - mark a MMIO has been accessed
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ */
+static inline void intel_gvt_mmio_set_accessed(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	gvt->mmio.mmio_attribute[offset >> 2] |= F_ACCESSED;
+}
+
+/**
+ * intel_gvt_mmio_is_cmd_accessible - if a MMIO could be accessed by command
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * True if an MMIO is able to be accessed by GPU commands
+ */
+static inline bool intel_gvt_mmio_is_cmd_accessible(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_ACCESS;
+}
+
+/**
+ * intel_gvt_mmio_set_cmd_accessible -
+ *				mark a MMIO could be accessible by command
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ */
+static inline void intel_gvt_mmio_set_cmd_accessible(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_ACCESS;
+}
+
+/**
+ * intel_gvt_mmio_is_unalign - mark a MMIO could be accessed unaligned
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ */
+static inline bool intel_gvt_mmio_is_unalign(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	return gvt->mmio.mmio_attribute[offset >> 2] & F_UNALIGN;
+}
+
+/**
+ * intel_gvt_mmio_has_mode_mask - if a MMIO has a mode mask
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * True if a MMIO has a mode mask in its higher 16 bits, false if it isn't.
+ *
+ */
+static inline bool intel_gvt_mmio_has_mode_mask(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	return gvt->mmio.mmio_attribute[offset >> 2] & F_MODE_MASK;
+}
+
+/**
+ * intel_gvt_mmio_is_sr_in_ctx -
+ *		check if an MMIO has F_SR_IN_CTX mask
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * True if an MMIO has an F_SR_IN_CTX  mask, false if it isn't.
+ *
+ */
+static inline bool intel_gvt_mmio_is_sr_in_ctx(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	return gvt->mmio.mmio_attribute[offset >> 2] & F_SR_IN_CTX;
+}
+
+/**
+ * intel_gvt_mmio_set_sr_in_ctx -
+ *		mask an MMIO in GVT's mmio save-restore list and also
+ *		in hardware logical context image
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ */
+static inline void intel_gvt_mmio_set_sr_in_ctx(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	gvt->mmio.mmio_attribute[offset >> 2] |= F_SR_IN_CTX;
+}
+
+void intel_gvt_debugfs_add_vgpu(struct intel_vgpu *vgpu);
+/**
+ * intel_gvt_mmio_set_cmd_write_patch -
+ *				mark an MMIO if its cmd write needs to be
+ *				patched
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ */
+static inline void intel_gvt_mmio_set_cmd_write_patch(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	gvt->mmio.mmio_attribute[offset >> 2] |= F_CMD_WRITE_PATCH;
+}
+
+/**
+ * intel_gvt_mmio_is_cmd_write_patch - check if an mmio's cmd access needs to
+ * be patched
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * True if GPU commmand write to an MMIO should be patched
+ */
+static inline bool intel_gvt_mmio_is_cmd_write_patch(
+			struct intel_gvt *gvt, unsigned int offset)
+{
+	return gvt->mmio.mmio_attribute[offset >> 2] & F_CMD_WRITE_PATCH;
+}
+
+/**
+ * intel_gvt_read_gpa - copy data from GPA to host data buffer
+ * @vgpu: a vGPU
+ * @gpa: guest physical address
+ * @buf: host data buffer
+ * @len: data length
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa,
+		void *buf, unsigned long len)
+{
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+		return -ESRCH;
+	return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, false);
+}
+
+/**
+ * intel_gvt_write_gpa - copy data from host data buffer to GPA
+ * @vgpu: a vGPU
+ * @gpa: guest physical address
+ * @buf: host data buffer
+ * @len: data length
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline int intel_gvt_write_gpa(struct intel_vgpu *vgpu,
+		unsigned long gpa, void *buf, unsigned long len)
+{
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+		return -ESRCH;
+	return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, true);
+}
+
+void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_debugfs_init(struct intel_gvt *gvt);
+void intel_gvt_debugfs_clean(struct intel_gvt *gvt);
+
+int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn);
+int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn);
+int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr);
+int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
+		unsigned long size, dma_addr_t *dma_addr);
+void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu,
+		dma_addr_t dma_addr);
+
+#include "trace.h"
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
new file mode 100644
index 0000000000..a9f7fa9b90
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -0,0 +1,3237 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Eddie Dong <eddie.dong@intel.com>
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Pei Zhang <pei.zhang@intel.com>
+ *    Niu Bing <bing.niu@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+
+ */
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gvt.h"
+#include "i915_pvinfo.h"
+#include "intel_mchbar_regs.h"
+#include "display/intel_display_types.h"
+#include "display/intel_dmc_regs.h"
+#include "display/intel_dp_aux_regs.h"
+#include "display/intel_dpio_phy.h"
+#include "display/intel_fbc.h"
+#include "display/intel_fdi_regs.h"
+#include "display/intel_pps_regs.h"
+#include "display/intel_psr_regs.h"
+#include "display/skl_watermark_regs.h"
+#include "display/vlv_dsi_pll_regs.h"
+#include "gt/intel_gt_regs.h"
+
+/* XXX FIXME i915 has changed PP_XXX definition */
+#define PCH_PP_STATUS  _MMIO(0xc7200)
+#define PCH_PP_CONTROL _MMIO(0xc7204)
+#define PCH_PP_ON_DELAYS _MMIO(0xc7208)
+#define PCH_PP_OFF_DELAYS _MMIO(0xc720c)
+#define PCH_PP_DIVISOR _MMIO(0xc7210)
+
+unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt)
+{
+	struct drm_i915_private *i915 = gvt->gt->i915;
+
+	if (IS_BROADWELL(i915))
+		return D_BDW;
+	else if (IS_SKYLAKE(i915))
+		return D_SKL;
+	else if (IS_KABYLAKE(i915))
+		return D_KBL;
+	else if (IS_BROXTON(i915))
+		return D_BXT;
+	else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
+		return D_CFL;
+
+	return 0;
+}
+
+static bool intel_gvt_match_device(struct intel_gvt *gvt,
+		unsigned long device)
+{
+	return intel_gvt_get_device_type(gvt) & device;
+}
+
+static void read_vreg(struct intel_vgpu *vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
+}
+
+static void write_vreg(struct intel_vgpu *vgpu, unsigned int offset,
+	void *p_data, unsigned int bytes)
+{
+	memcpy(&vgpu_vreg(vgpu, offset), p_data, bytes);
+}
+
+struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
+						  unsigned int offset)
+{
+	struct intel_gvt_mmio_info *e;
+
+	hash_for_each_possible(gvt->mmio.mmio_info_table, e, node, offset) {
+		if (e->offset == offset)
+			return e;
+	}
+	return NULL;
+}
+
+static int setup_mmio_info(struct intel_gvt *gvt, u32 offset, u32 size,
+			   u16 flags, u32 addr_mask, u32 ro_mask, u32 device,
+			   gvt_mmio_func read, gvt_mmio_func write)
+{
+	struct intel_gvt_mmio_info *p;
+	u32 start, end, i;
+
+	if (!intel_gvt_match_device(gvt, device))
+		return 0;
+
+	if (WARN_ON(!IS_ALIGNED(offset, 4)))
+		return -EINVAL;
+
+	start = offset;
+	end = offset + size;
+
+	for (i = start; i < end; i += 4) {
+		p = intel_gvt_find_mmio_info(gvt, i);
+		if (!p) {
+			WARN(1, "assign a handler to a non-tracked mmio %x\n",
+				i);
+			return -ENODEV;
+		}
+		p->ro_mask = ro_mask;
+		gvt->mmio.mmio_attribute[i / 4] = flags;
+		if (read)
+			p->read = read;
+		if (write)
+			p->write = write;
+	}
+	return 0;
+}
+
+/**
+ * intel_gvt_render_mmio_to_engine - convert a mmio offset into the engine
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * The engine containing the offset within its mmio page.
+ */
+const struct intel_engine_cs *
+intel_gvt_render_mmio_to_engine(struct intel_gvt *gvt, unsigned int offset)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	offset &= ~GENMASK(11, 0);
+	for_each_engine(engine, gvt->gt, id)
+		if (engine->mmio_base == offset)
+			return engine;
+
+	return NULL;
+}
+
+#define offset_to_fence_num(offset) \
+	((offset - i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0))) >> 3)
+
+#define fence_num_to_offset(num) \
+	(num * 8 + i915_mmio_reg_offset(FENCE_REG_GEN6_LO(0)))
+
+
+void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
+{
+	switch (reason) {
+	case GVT_FAILSAFE_UNSUPPORTED_GUEST:
+		pr_err("Detected your guest driver doesn't support GVT-g.\n");
+		break;
+	case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
+		pr_err("Graphics resource is not enough for the guest\n");
+		break;
+	case GVT_FAILSAFE_GUEST_ERR:
+		pr_err("GVT Internal error  for the guest\n");
+		break;
+	default:
+		break;
+	}
+	pr_err("Now vgpu %d will enter failsafe mode.\n", vgpu->id);
+	vgpu->failsafe = true;
+}
+
+static int sanitize_fence_mmio_access(struct intel_vgpu *vgpu,
+		unsigned int fence_num, void *p_data, unsigned int bytes)
+{
+	unsigned int max_fence = vgpu_fence_sz(vgpu);
+
+	if (fence_num >= max_fence) {
+		gvt_vgpu_err("access oob fence reg %d/%d\n",
+			     fence_num, max_fence);
+
+		/* When guest access oob fence regs without access
+		 * pv_info first, we treat guest not supporting GVT,
+		 * and we will let vgpu enter failsafe mode.
+		 */
+		if (!vgpu->pv_notified)
+			enter_failsafe_mode(vgpu,
+					GVT_FAILSAFE_UNSUPPORTED_GUEST);
+
+		memset(p_data, 0, bytes);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int gamw_echo_dev_rw_ia_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 ips = (*(u32 *)p_data) & GAMW_ECO_ENABLE_64K_IPS_FIELD;
+
+	if (GRAPHICS_VER(vgpu->gvt->gt->i915) <= 10) {
+		if (ips == GAMW_ECO_ENABLE_64K_IPS_FIELD)
+			gvt_dbg_core("vgpu%d: ips enabled\n", vgpu->id);
+		else if (!ips)
+			gvt_dbg_core("vgpu%d: ips disabled\n", vgpu->id);
+		else {
+			/* All engines must be enabled together for vGPU,
+			 * since we don't know which engine the ppgtt will
+			 * bind to when shadowing.
+			 */
+			gvt_vgpu_err("Unsupported IPS setting %x, cannot enable 64K gtt.\n",
+				     ips);
+			return -EINVAL;
+		}
+	}
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
+static int fence_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
+		void *p_data, unsigned int bytes)
+{
+	int ret;
+
+	ret = sanitize_fence_mmio_access(vgpu, offset_to_fence_num(off),
+			p_data, bytes);
+	if (ret)
+		return ret;
+	read_vreg(vgpu, off, p_data, bytes);
+	return 0;
+}
+
+static int fence_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
+		void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	unsigned int fence_num = offset_to_fence_num(off);
+	int ret;
+
+	ret = sanitize_fence_mmio_access(vgpu, fence_num, p_data, bytes);
+	if (ret)
+		return ret;
+	write_vreg(vgpu, off, p_data, bytes);
+
+	mmio_hw_access_pre(gvt->gt);
+	intel_vgpu_write_fence(vgpu, fence_num,
+			vgpu_vreg64(vgpu, fence_num_to_offset(fence_num)));
+	mmio_hw_access_post(gvt->gt);
+	return 0;
+}
+
+#define CALC_MODE_MASK_REG(old, new) \
+	(((new) & GENMASK(31, 16)) \
+	 | ((((old) & GENMASK(15, 0)) & ~((new) >> 16)) \
+	 | ((new) & ((new) >> 16))))
+
+static int mul_force_wake_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 old, new;
+	u32 ack_reg_offset;
+
+	old = vgpu_vreg(vgpu, offset);
+	new = CALC_MODE_MASK_REG(old, *(u32 *)p_data);
+
+	if (GRAPHICS_VER(vgpu->gvt->gt->i915)  >=  9) {
+		switch (offset) {
+		case FORCEWAKE_RENDER_GEN9_REG:
+			ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG;
+			break;
+		case FORCEWAKE_GT_GEN9_REG:
+			ack_reg_offset = FORCEWAKE_ACK_GT_GEN9_REG;
+			break;
+		case FORCEWAKE_MEDIA_GEN9_REG:
+			ack_reg_offset = FORCEWAKE_ACK_MEDIA_GEN9_REG;
+			break;
+		default:
+			/*should not hit here*/
+			gvt_vgpu_err("invalid forcewake offset 0x%x\n", offset);
+			return -EINVAL;
+		}
+	} else {
+		ack_reg_offset = FORCEWAKE_ACK_HSW_REG;
+	}
+
+	vgpu_vreg(vgpu, offset) = new;
+	vgpu_vreg(vgpu, ack_reg_offset) = (new & GENMASK(15, 0));
+	return 0;
+}
+
+static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+			    void *p_data, unsigned int bytes)
+{
+	intel_engine_mask_t engine_mask = 0;
+	u32 data;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	data = vgpu_vreg(vgpu, offset);
+
+	if (data & GEN6_GRDOM_FULL) {
+		gvt_dbg_mmio("vgpu%d: request full GPU reset\n", vgpu->id);
+		engine_mask = ALL_ENGINES;
+	} else {
+		if (data & GEN6_GRDOM_RENDER) {
+			gvt_dbg_mmio("vgpu%d: request RCS reset\n", vgpu->id);
+			engine_mask |= BIT(RCS0);
+		}
+		if (data & GEN6_GRDOM_MEDIA) {
+			gvt_dbg_mmio("vgpu%d: request VCS reset\n", vgpu->id);
+			engine_mask |= BIT(VCS0);
+		}
+		if (data & GEN6_GRDOM_BLT) {
+			gvt_dbg_mmio("vgpu%d: request BCS Reset\n", vgpu->id);
+			engine_mask |= BIT(BCS0);
+		}
+		if (data & GEN6_GRDOM_VECS) {
+			gvt_dbg_mmio("vgpu%d: request VECS Reset\n", vgpu->id);
+			engine_mask |= BIT(VECS0);
+		}
+		if (data & GEN8_GRDOM_MEDIA2) {
+			gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id);
+			engine_mask |= BIT(VCS1);
+		}
+		if (data & GEN9_GRDOM_GUC) {
+			gvt_dbg_mmio("vgpu%d: request GUC Reset\n", vgpu->id);
+			vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET;
+		}
+		engine_mask &= vgpu->gvt->gt->info.engine_mask;
+	}
+
+	/* vgpu_lock already hold by emulate mmio r/w */
+	intel_gvt_reset_vgpu_locked(vgpu, false, engine_mask);
+
+	/* sw will wait for the device to ack the reset request */
+	vgpu_vreg(vgpu, offset) = 0;
+
+	return 0;
+}
+
+static int gmbus_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	return intel_gvt_i2c_handle_gmbus_read(vgpu, offset, p_data, bytes);
+}
+
+static int gmbus_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	return intel_gvt_i2c_handle_gmbus_write(vgpu, offset, p_data, bytes);
+}
+
+static int pch_pp_control_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (vgpu_vreg(vgpu, offset) & PANEL_POWER_ON) {
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_ON;
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE;
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN;
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE;
+
+	} else
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) &=
+			~(PP_ON | PP_SEQUENCE_POWER_DOWN
+					| PP_CYCLE_DELAY_ACTIVE);
+	return 0;
+}
+
+static int transconf_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (vgpu_vreg(vgpu, offset) & TRANS_ENABLE)
+		vgpu_vreg(vgpu, offset) |= TRANS_STATE_ENABLE;
+	else
+		vgpu_vreg(vgpu, offset) &= ~TRANS_STATE_ENABLE;
+	return 0;
+}
+
+static int lcpll_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (vgpu_vreg(vgpu, offset) & LCPLL_PLL_DISABLE)
+		vgpu_vreg(vgpu, offset) &= ~LCPLL_PLL_LOCK;
+	else
+		vgpu_vreg(vgpu, offset) |= LCPLL_PLL_LOCK;
+
+	if (vgpu_vreg(vgpu, offset) & LCPLL_CD_SOURCE_FCLK)
+		vgpu_vreg(vgpu, offset) |= LCPLL_CD_SOURCE_FCLK_DONE;
+	else
+		vgpu_vreg(vgpu, offset) &= ~LCPLL_CD_SOURCE_FCLK_DONE;
+
+	return 0;
+}
+
+static int dpy_reg_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	switch (offset) {
+	case 0xe651c:
+	case 0xe661c:
+	case 0xe671c:
+	case 0xe681c:
+		vgpu_vreg(vgpu, offset) = 1 << 17;
+		break;
+	case 0xe6c04:
+		vgpu_vreg(vgpu, offset) = 0x3;
+		break;
+	case 0xe6e1c:
+		vgpu_vreg(vgpu, offset) = 0x2f << 16;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	read_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
+/*
+ * Only PIPE_A is enabled in current vGPU display and PIPE_A is tied to
+ *   TRANSCODER_A in HW. DDI/PORT could be PORT_x depends on
+ *   setup_virtual_dp_monitor().
+ * emulate_monitor_status_change() set up PLL for PORT_x as the initial enabled
+ *   DPLL. Later guest driver may setup a different DPLLx when setting mode.
+ * So the correct sequence to find DP stream clock is:
+ *   Check TRANS_DDI_FUNC_CTL on TRANSCODER_A to get PORT_x.
+ *   Check correct PLLx for PORT_x to get PLL frequency and DP bitrate.
+ * Then Refresh rate then can be calculated based on follow equations:
+ *   Pixel clock = h_total * v_total * refresh_rate
+ *   stream clock = Pixel clock
+ *   ls_clk = DP bitrate
+ *   Link M/N = strm_clk / ls_clk
+ */
+
+static u32 bdw_vgpu_get_dp_bitrate(struct intel_vgpu *vgpu, enum port port)
+{
+	u32 dp_br = 0;
+	u32 ddi_pll_sel = vgpu_vreg_t(vgpu, PORT_CLK_SEL(port));
+
+	switch (ddi_pll_sel) {
+	case PORT_CLK_SEL_LCPLL_2700:
+		dp_br = 270000 * 2;
+		break;
+	case PORT_CLK_SEL_LCPLL_1350:
+		dp_br = 135000 * 2;
+		break;
+	case PORT_CLK_SEL_LCPLL_810:
+		dp_br = 81000 * 2;
+		break;
+	case PORT_CLK_SEL_SPLL:
+	{
+		switch (vgpu_vreg_t(vgpu, SPLL_CTL) & SPLL_FREQ_MASK) {
+		case SPLL_FREQ_810MHz:
+			dp_br = 81000 * 2;
+			break;
+		case SPLL_FREQ_1350MHz:
+			dp_br = 135000 * 2;
+			break;
+		case SPLL_FREQ_2700MHz:
+			dp_br = 270000 * 2;
+			break;
+		default:
+			gvt_dbg_dpy("vgpu-%d PORT_%c can't get freq from SPLL 0x%08x\n",
+				    vgpu->id, port_name(port), vgpu_vreg_t(vgpu, SPLL_CTL));
+			break;
+		}
+		break;
+	}
+	case PORT_CLK_SEL_WRPLL1:
+	case PORT_CLK_SEL_WRPLL2:
+	{
+		u32 wrpll_ctl;
+		int refclk, n, p, r;
+
+		if (ddi_pll_sel == PORT_CLK_SEL_WRPLL1)
+			wrpll_ctl = vgpu_vreg_t(vgpu, WRPLL_CTL(DPLL_ID_WRPLL1));
+		else
+			wrpll_ctl = vgpu_vreg_t(vgpu, WRPLL_CTL(DPLL_ID_WRPLL2));
+
+		switch (wrpll_ctl & WRPLL_REF_MASK) {
+		case WRPLL_REF_PCH_SSC:
+			refclk = vgpu->gvt->gt->i915->display.dpll.ref_clks.ssc;
+			break;
+		case WRPLL_REF_LCPLL:
+			refclk = 2700000;
+			break;
+		default:
+			gvt_dbg_dpy("vgpu-%d PORT_%c WRPLL can't get refclk 0x%08x\n",
+				    vgpu->id, port_name(port), wrpll_ctl);
+			goto out;
+		}
+
+		r = wrpll_ctl & WRPLL_DIVIDER_REF_MASK;
+		p = (wrpll_ctl & WRPLL_DIVIDER_POST_MASK) >> WRPLL_DIVIDER_POST_SHIFT;
+		n = (wrpll_ctl & WRPLL_DIVIDER_FB_MASK) >> WRPLL_DIVIDER_FB_SHIFT;
+
+		dp_br = (refclk * n / 10) / (p * r) * 2;
+		break;
+	}
+	default:
+		gvt_dbg_dpy("vgpu-%d PORT_%c has invalid clock select 0x%08x\n",
+			    vgpu->id, port_name(port), vgpu_vreg_t(vgpu, PORT_CLK_SEL(port)));
+		break;
+	}
+
+out:
+	return dp_br;
+}
+
+static u32 bxt_vgpu_get_dp_bitrate(struct intel_vgpu *vgpu, enum port port)
+{
+	u32 dp_br = 0;
+	int refclk = vgpu->gvt->gt->i915->display.dpll.ref_clks.nssc;
+	enum dpio_phy phy = DPIO_PHY0;
+	enum dpio_channel ch = DPIO_CH0;
+	struct dpll clock = {0};
+	u32 temp;
+
+	/* Port to PHY mapping is fixed, see bxt_ddi_phy_info{} */
+	switch (port) {
+	case PORT_A:
+		phy = DPIO_PHY1;
+		ch = DPIO_CH0;
+		break;
+	case PORT_B:
+		phy = DPIO_PHY0;
+		ch = DPIO_CH0;
+		break;
+	case PORT_C:
+		phy = DPIO_PHY0;
+		ch = DPIO_CH1;
+		break;
+	default:
+		gvt_dbg_dpy("vgpu-%d no PHY for PORT_%c\n", vgpu->id, port_name(port));
+		goto out;
+	}
+
+	temp = vgpu_vreg_t(vgpu, BXT_PORT_PLL_ENABLE(port));
+	if (!(temp & PORT_PLL_ENABLE) || !(temp & PORT_PLL_LOCK)) {
+		gvt_dbg_dpy("vgpu-%d PORT_%c PLL_ENABLE 0x%08x isn't enabled or locked\n",
+			    vgpu->id, port_name(port), temp);
+		goto out;
+	}
+
+	clock.m1 = 2;
+	clock.m2 = REG_FIELD_GET(PORT_PLL_M2_INT_MASK,
+				 vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 0))) << 22;
+	if (vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 3)) & PORT_PLL_M2_FRAC_ENABLE)
+		clock.m2 |= REG_FIELD_GET(PORT_PLL_M2_FRAC_MASK,
+					  vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 2)));
+	clock.n = REG_FIELD_GET(PORT_PLL_N_MASK,
+				vgpu_vreg_t(vgpu, BXT_PORT_PLL(phy, ch, 1)));
+	clock.p1 = REG_FIELD_GET(PORT_PLL_P1_MASK,
+				 vgpu_vreg_t(vgpu, BXT_PORT_PLL_EBB_0(phy, ch)));
+	clock.p2 = REG_FIELD_GET(PORT_PLL_P2_MASK,
+				 vgpu_vreg_t(vgpu, BXT_PORT_PLL_EBB_0(phy, ch)));
+	clock.m = clock.m1 * clock.m2;
+	clock.p = clock.p1 * clock.p2 * 5;
+
+	if (clock.n == 0 || clock.p == 0) {
+		gvt_dbg_dpy("vgpu-%d PORT_%c PLL has invalid divider\n", vgpu->id, port_name(port));
+		goto out;
+	}
+
+	clock.vco = DIV_ROUND_CLOSEST_ULL(mul_u32_u32(refclk, clock.m), clock.n << 22);
+	clock.dot = DIV_ROUND_CLOSEST(clock.vco, clock.p);
+
+	dp_br = clock.dot;
+
+out:
+	return dp_br;
+}
+
+static u32 skl_vgpu_get_dp_bitrate(struct intel_vgpu *vgpu, enum port port)
+{
+	u32 dp_br = 0;
+	enum intel_dpll_id dpll_id = DPLL_ID_SKL_DPLL0;
+
+	/* Find the enabled DPLL for the DDI/PORT */
+	if (!(vgpu_vreg_t(vgpu, DPLL_CTRL2) & DPLL_CTRL2_DDI_CLK_OFF(port)) &&
+	    (vgpu_vreg_t(vgpu, DPLL_CTRL2) & DPLL_CTRL2_DDI_SEL_OVERRIDE(port))) {
+		dpll_id += (vgpu_vreg_t(vgpu, DPLL_CTRL2) &
+			DPLL_CTRL2_DDI_CLK_SEL_MASK(port)) >>
+			DPLL_CTRL2_DDI_CLK_SEL_SHIFT(port);
+	} else {
+		gvt_dbg_dpy("vgpu-%d DPLL for PORT_%c isn't turned on\n",
+			    vgpu->id, port_name(port));
+		return dp_br;
+	}
+
+	/* Find PLL output frequency from correct DPLL, and get bir rate */
+	switch ((vgpu_vreg_t(vgpu, DPLL_CTRL1) &
+		DPLL_CTRL1_LINK_RATE_MASK(dpll_id)) >>
+		DPLL_CTRL1_LINK_RATE_SHIFT(dpll_id)) {
+		case DPLL_CTRL1_LINK_RATE_810:
+			dp_br = 81000 * 2;
+			break;
+		case DPLL_CTRL1_LINK_RATE_1080:
+			dp_br = 108000 * 2;
+			break;
+		case DPLL_CTRL1_LINK_RATE_1350:
+			dp_br = 135000 * 2;
+			break;
+		case DPLL_CTRL1_LINK_RATE_1620:
+			dp_br = 162000 * 2;
+			break;
+		case DPLL_CTRL1_LINK_RATE_2160:
+			dp_br = 216000 * 2;
+			break;
+		case DPLL_CTRL1_LINK_RATE_2700:
+			dp_br = 270000 * 2;
+			break;
+		default:
+			dp_br = 0;
+			gvt_dbg_dpy("vgpu-%d PORT_%c fail to get DPLL-%d freq\n",
+				    vgpu->id, port_name(port), dpll_id);
+	}
+
+	return dp_br;
+}
+
+static void vgpu_update_refresh_rate(struct intel_vgpu *vgpu)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	enum port port;
+	u32 dp_br, link_m, link_n, htotal, vtotal;
+
+	/* Find DDI/PORT assigned to TRANSCODER_A, expect B or D */
+	port = (vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &
+		TRANS_DDI_PORT_MASK) >> TRANS_DDI_PORT_SHIFT;
+	if (port != PORT_B && port != PORT_D) {
+		gvt_dbg_dpy("vgpu-%d unsupported PORT_%c\n", vgpu->id, port_name(port));
+		return;
+	}
+
+	/* Calculate DP bitrate from PLL */
+	if (IS_BROADWELL(dev_priv))
+		dp_br = bdw_vgpu_get_dp_bitrate(vgpu, port);
+	else if (IS_BROXTON(dev_priv))
+		dp_br = bxt_vgpu_get_dp_bitrate(vgpu, port);
+	else
+		dp_br = skl_vgpu_get_dp_bitrate(vgpu, port);
+
+	/* Get DP link symbol clock M/N */
+	link_m = vgpu_vreg_t(vgpu, PIPE_LINK_M1(TRANSCODER_A));
+	link_n = vgpu_vreg_t(vgpu, PIPE_LINK_N1(TRANSCODER_A));
+
+	/* Get H/V total from transcoder timing */
+	htotal = (vgpu_vreg_t(vgpu, TRANS_HTOTAL(TRANSCODER_A)) >> TRANS_HTOTAL_SHIFT);
+	vtotal = (vgpu_vreg_t(vgpu, TRANS_VTOTAL(TRANSCODER_A)) >> TRANS_VTOTAL_SHIFT);
+
+	if (dp_br && link_n && htotal && vtotal) {
+		u64 pixel_clk = 0;
+		u32 new_rate = 0;
+		u32 *old_rate = &(intel_vgpu_port(vgpu, vgpu->display.port_num)->vrefresh_k);
+
+		/* Calcuate pixel clock by (ls_clk * M / N) */
+		pixel_clk = div_u64(mul_u32_u32(link_m, dp_br), link_n);
+		pixel_clk *= MSEC_PER_SEC;
+
+		/* Calcuate refresh rate by (pixel_clk / (h_total * v_total)) */
+		new_rate = DIV64_U64_ROUND_CLOSEST(mul_u64_u32_shr(pixel_clk, MSEC_PER_SEC, 0), mul_u32_u32(htotal + 1, vtotal + 1));
+
+		if (*old_rate != new_rate)
+			*old_rate = new_rate;
+
+		gvt_dbg_dpy("vgpu-%d PIPE_%c refresh rate updated to %d\n",
+			    vgpu->id, pipe_name(PIPE_A), new_rate);
+	}
+}
+
+static int pipeconf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 data;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	data = vgpu_vreg(vgpu, offset);
+
+	if (data & TRANSCONF_ENABLE) {
+		vgpu_vreg(vgpu, offset) |= TRANSCONF_STATE_ENABLE;
+		vgpu_update_refresh_rate(vgpu);
+		vgpu_update_vblank_emulation(vgpu, true);
+	} else {
+		vgpu_vreg(vgpu, offset) &= ~TRANSCONF_STATE_ENABLE;
+		vgpu_update_vblank_emulation(vgpu, false);
+	}
+	return 0;
+}
+
+/* sorted in ascending order */
+static i915_reg_t force_nonpriv_white_list[] = {
+	_MMIO(0xd80),
+	GEN9_CS_DEBUG_MODE1, //_MMIO(0x20ec)
+	GEN9_CTX_PREEMPT_REG,//_MMIO(0x2248)
+	CL_PRIMITIVES_COUNT, //_MMIO(0x2340)
+	PS_INVOCATION_COUNT, //_MMIO(0x2348)
+	PS_DEPTH_COUNT, //_MMIO(0x2350)
+	GEN8_CS_CHICKEN1,//_MMIO(0x2580)
+	_MMIO(0x2690),
+	_MMIO(0x2694),
+	_MMIO(0x2698),
+	_MMIO(0x2754),
+	_MMIO(0x28a0),
+	_MMIO(0x4de0),
+	_MMIO(0x4de4),
+	_MMIO(0x4dfc),
+	GEN7_COMMON_SLICE_CHICKEN1,//_MMIO(0x7010)
+	_MMIO(0x7014),
+	HDC_CHICKEN0,//_MMIO(0x7300)
+	GEN8_HDC_CHICKEN1,//_MMIO(0x7304)
+	_MMIO(0x7700),
+	_MMIO(0x7704),
+	_MMIO(0x7708),
+	_MMIO(0x770c),
+	_MMIO(0x83a8),
+	_MMIO(0xb110),
+	_MMIO(0xb118),
+	_MMIO(0xe100),
+	_MMIO(0xe18c),
+	_MMIO(0xe48c),
+	_MMIO(0xe5f4),
+	_MMIO(0x64844),
+};
+
+/* a simple bsearch */
+static inline bool in_whitelist(u32 reg)
+{
+	int left = 0, right = ARRAY_SIZE(force_nonpriv_white_list);
+	i915_reg_t *array = force_nonpriv_white_list;
+
+	while (left < right) {
+		int mid = (left + right)/2;
+
+		if (reg > array[mid].reg)
+			left = mid + 1;
+		else if (reg < array[mid].reg)
+			right = mid;
+		else
+			return true;
+	}
+	return false;
+}
+
+static int force_nonpriv_write(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 reg_nonpriv = (*(u32 *)p_data) & REG_GENMASK(25, 2);
+	const struct intel_engine_cs *engine =
+		intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
+
+	if (bytes != 4 || !IS_ALIGNED(offset, bytes) || !engine) {
+		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV offset %x(%dB)\n",
+			vgpu->id, offset, bytes);
+		return -EINVAL;
+	}
+
+	if (!in_whitelist(reg_nonpriv) &&
+	    reg_nonpriv != i915_mmio_reg_offset(RING_NOPID(engine->mmio_base))) {
+		gvt_err("vgpu(%d) Invalid FORCE_NONPRIV write %x at offset %x\n",
+			vgpu->id, reg_nonpriv, offset);
+	} else
+		intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes);
+
+	return 0;
+}
+
+static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (vgpu_vreg(vgpu, offset) & DDI_BUF_CTL_ENABLE) {
+		vgpu_vreg(vgpu, offset) &= ~DDI_BUF_IS_IDLE;
+	} else {
+		vgpu_vreg(vgpu, offset) |= DDI_BUF_IS_IDLE;
+		if (offset == i915_mmio_reg_offset(DDI_BUF_CTL(PORT_E)))
+			vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E))
+				&= ~DP_TP_STATUS_AUTOTRAIN_DONE;
+	}
+	return 0;
+}
+
+static int fdi_rx_iir_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	vgpu_vreg(vgpu, offset) &= ~*(u32 *)p_data;
+	return 0;
+}
+
+#define FDI_LINK_TRAIN_PATTERN1         0
+#define FDI_LINK_TRAIN_PATTERN2         1
+
+static int fdi_auto_training_started(struct intel_vgpu *vgpu)
+{
+	u32 ddi_buf_ctl = vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_E));
+	u32 rx_ctl = vgpu_vreg(vgpu, _FDI_RXA_CTL);
+	u32 tx_ctl = vgpu_vreg_t(vgpu, DP_TP_CTL(PORT_E));
+
+	if ((ddi_buf_ctl & DDI_BUF_CTL_ENABLE) &&
+			(rx_ctl & FDI_RX_ENABLE) &&
+			(rx_ctl & FDI_AUTO_TRAINING) &&
+			(tx_ctl & DP_TP_CTL_ENABLE) &&
+			(tx_ctl & DP_TP_CTL_FDI_AUTOTRAIN))
+		return 1;
+	else
+		return 0;
+}
+
+static int check_fdi_rx_train_status(struct intel_vgpu *vgpu,
+		enum pipe pipe, unsigned int train_pattern)
+{
+	i915_reg_t fdi_rx_imr, fdi_tx_ctl, fdi_rx_ctl;
+	unsigned int fdi_rx_check_bits, fdi_tx_check_bits;
+	unsigned int fdi_rx_train_bits, fdi_tx_train_bits;
+	unsigned int fdi_iir_check_bits;
+
+	fdi_rx_imr = FDI_RX_IMR(pipe);
+	fdi_tx_ctl = FDI_TX_CTL(pipe);
+	fdi_rx_ctl = FDI_RX_CTL(pipe);
+
+	if (train_pattern == FDI_LINK_TRAIN_PATTERN1) {
+		fdi_rx_train_bits = FDI_LINK_TRAIN_PATTERN_1_CPT;
+		fdi_tx_train_bits = FDI_LINK_TRAIN_PATTERN_1;
+		fdi_iir_check_bits = FDI_RX_BIT_LOCK;
+	} else if (train_pattern == FDI_LINK_TRAIN_PATTERN2) {
+		fdi_rx_train_bits = FDI_LINK_TRAIN_PATTERN_2_CPT;
+		fdi_tx_train_bits = FDI_LINK_TRAIN_PATTERN_2;
+		fdi_iir_check_bits = FDI_RX_SYMBOL_LOCK;
+	} else {
+		gvt_vgpu_err("Invalid train pattern %d\n", train_pattern);
+		return -EINVAL;
+	}
+
+	fdi_rx_check_bits = FDI_RX_ENABLE | fdi_rx_train_bits;
+	fdi_tx_check_bits = FDI_TX_ENABLE | fdi_tx_train_bits;
+
+	/* If imr bit has been masked */
+	if (vgpu_vreg_t(vgpu, fdi_rx_imr) & fdi_iir_check_bits)
+		return 0;
+
+	if (((vgpu_vreg_t(vgpu, fdi_tx_ctl) & fdi_tx_check_bits)
+			== fdi_tx_check_bits)
+		&& ((vgpu_vreg_t(vgpu, fdi_rx_ctl) & fdi_rx_check_bits)
+			== fdi_rx_check_bits))
+		return 1;
+	else
+		return 0;
+}
+
+#define INVALID_INDEX (~0U)
+
+static unsigned int calc_index(unsigned int offset, unsigned int start,
+	unsigned int next, unsigned int end, i915_reg_t i915_end)
+{
+	unsigned int range = next - start;
+
+	if (!end)
+		end = i915_mmio_reg_offset(i915_end);
+	if (offset < start || offset > end)
+		return INVALID_INDEX;
+	offset -= start;
+	return offset / range;
+}
+
+#define FDI_RX_CTL_TO_PIPE(offset) \
+	calc_index(offset, _FDI_RXA_CTL, _FDI_RXB_CTL, 0, FDI_RX_CTL(PIPE_C))
+
+#define FDI_TX_CTL_TO_PIPE(offset) \
+	calc_index(offset, _FDI_TXA_CTL, _FDI_TXB_CTL, 0, FDI_TX_CTL(PIPE_C))
+
+#define FDI_RX_IMR_TO_PIPE(offset) \
+	calc_index(offset, _FDI_RXA_IMR, _FDI_RXB_IMR, 0, FDI_RX_IMR(PIPE_C))
+
+static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	i915_reg_t fdi_rx_iir;
+	unsigned int index;
+	int ret;
+
+	if (FDI_RX_CTL_TO_PIPE(offset) != INVALID_INDEX)
+		index = FDI_RX_CTL_TO_PIPE(offset);
+	else if (FDI_TX_CTL_TO_PIPE(offset) != INVALID_INDEX)
+		index = FDI_TX_CTL_TO_PIPE(offset);
+	else if (FDI_RX_IMR_TO_PIPE(offset) != INVALID_INDEX)
+		index = FDI_RX_IMR_TO_PIPE(offset);
+	else {
+		gvt_vgpu_err("Unsupported registers %x\n", offset);
+		return -EINVAL;
+	}
+
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	fdi_rx_iir = FDI_RX_IIR(index);
+
+	ret = check_fdi_rx_train_status(vgpu, index, FDI_LINK_TRAIN_PATTERN1);
+	if (ret < 0)
+		return ret;
+	if (ret)
+		vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK;
+
+	ret = check_fdi_rx_train_status(vgpu, index, FDI_LINK_TRAIN_PATTERN2);
+	if (ret < 0)
+		return ret;
+	if (ret)
+		vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK;
+
+	if (offset == _FDI_RXA_CTL)
+		if (fdi_auto_training_started(vgpu))
+			vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E)) |=
+				DP_TP_STATUS_AUTOTRAIN_DONE;
+	return 0;
+}
+
+#define DP_TP_CTL_TO_PORT(offset) \
+	calc_index(offset, _DP_TP_CTL_A, _DP_TP_CTL_B, 0, DP_TP_CTL(PORT_E))
+
+static int dp_tp_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	i915_reg_t status_reg;
+	unsigned int index;
+	u32 data;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	index = DP_TP_CTL_TO_PORT(offset);
+	data = (vgpu_vreg(vgpu, offset) & GENMASK(10, 8)) >> 8;
+	if (data == 0x2) {
+		status_reg = DP_TP_STATUS(index);
+		vgpu_vreg_t(vgpu, status_reg) |= (1 << 25);
+	}
+	return 0;
+}
+
+static int dp_tp_status_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 reg_val;
+	u32 sticky_mask;
+
+	reg_val = *((u32 *)p_data);
+	sticky_mask = GENMASK(27, 26) | (1 << 24);
+
+	vgpu_vreg(vgpu, offset) = (reg_val & ~sticky_mask) |
+		(vgpu_vreg(vgpu, offset) & sticky_mask);
+	vgpu_vreg(vgpu, offset) &= ~(reg_val & sticky_mask);
+	return 0;
+}
+
+static int pch_adpa_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 data;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	data = vgpu_vreg(vgpu, offset);
+
+	if (data & ADPA_CRT_HOTPLUG_FORCE_TRIGGER)
+		vgpu_vreg(vgpu, offset) &= ~ADPA_CRT_HOTPLUG_FORCE_TRIGGER;
+	return 0;
+}
+
+static int south_chicken2_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 data;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	data = vgpu_vreg(vgpu, offset);
+
+	if (data & FDI_MPHY_IOSFSB_RESET_CTL)
+		vgpu_vreg(vgpu, offset) |= FDI_MPHY_IOSFSB_RESET_STATUS;
+	else
+		vgpu_vreg(vgpu, offset) &= ~FDI_MPHY_IOSFSB_RESET_STATUS;
+	return 0;
+}
+
+#define DSPSURF_TO_PIPE(offset) \
+	calc_index(offset, _DSPASURF, _DSPBSURF, 0, DSPSURF(PIPE_C))
+
+static int pri_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	u32 pipe = DSPSURF_TO_PIPE(offset);
+	int event = SKL_FLIP_EVENT(pipe, PLANE_PRIMARY);
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	vgpu_vreg_t(vgpu, DSPSURFLIVE(pipe)) = vgpu_vreg(vgpu, offset);
+
+	vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++;
+
+	if (vgpu_vreg_t(vgpu, DSPCNTR(pipe)) & PLANE_CTL_ASYNC_FLIP)
+		intel_vgpu_trigger_virtual_event(vgpu, event);
+	else
+		set_bit(event, vgpu->irq.flip_done_event[pipe]);
+
+	return 0;
+}
+
+#define SPRSURF_TO_PIPE(offset) \
+	calc_index(offset, _SPRA_SURF, _SPRB_SURF, 0, SPRSURF(PIPE_C))
+
+static int spr_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 pipe = SPRSURF_TO_PIPE(offset);
+	int event = SKL_FLIP_EVENT(pipe, PLANE_SPRITE0);
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	vgpu_vreg_t(vgpu, SPRSURFLIVE(pipe)) = vgpu_vreg(vgpu, offset);
+
+	if (vgpu_vreg_t(vgpu, SPRCTL(pipe)) & PLANE_CTL_ASYNC_FLIP)
+		intel_vgpu_trigger_virtual_event(vgpu, event);
+	else
+		set_bit(event, vgpu->irq.flip_done_event[pipe]);
+
+	return 0;
+}
+
+static int reg50080_mmio_write(struct intel_vgpu *vgpu,
+			       unsigned int offset, void *p_data,
+			       unsigned int bytes)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	enum pipe pipe = REG_50080_TO_PIPE(offset);
+	enum plane_id plane = REG_50080_TO_PLANE(offset);
+	int event = SKL_FLIP_EVENT(pipe, plane);
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	if (plane == PLANE_PRIMARY) {
+		vgpu_vreg_t(vgpu, DSPSURFLIVE(pipe)) = vgpu_vreg(vgpu, offset);
+		vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++;
+	} else {
+		vgpu_vreg_t(vgpu, SPRSURFLIVE(pipe)) = vgpu_vreg(vgpu, offset);
+	}
+
+	if ((vgpu_vreg(vgpu, offset) & REG50080_FLIP_TYPE_MASK) == REG50080_FLIP_TYPE_ASYNC)
+		intel_vgpu_trigger_virtual_event(vgpu, event);
+	else
+		set_bit(event, vgpu->irq.flip_done_event[pipe]);
+
+	return 0;
+}
+
+static int trigger_aux_channel_interrupt(struct intel_vgpu *vgpu,
+		unsigned int reg)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	enum intel_gvt_event_type event;
+
+	if (reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_A)))
+		event = AUX_CHANNEL_A;
+	else if (reg == _PCH_DPB_AUX_CH_CTL ||
+		 reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_B)))
+		event = AUX_CHANNEL_B;
+	else if (reg == _PCH_DPC_AUX_CH_CTL ||
+		 reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_C)))
+		event = AUX_CHANNEL_C;
+	else if (reg == _PCH_DPD_AUX_CH_CTL ||
+		 reg == i915_mmio_reg_offset(DP_AUX_CH_CTL(AUX_CH_D)))
+		event = AUX_CHANNEL_D;
+	else {
+		drm_WARN_ON(&dev_priv->drm, true);
+		return -EINVAL;
+	}
+
+	intel_vgpu_trigger_virtual_event(vgpu, event);
+	return 0;
+}
+
+static int dp_aux_ch_ctl_trans_done(struct intel_vgpu *vgpu, u32 value,
+		unsigned int reg, int len, bool data_valid)
+{
+	/* mark transaction done */
+	value |= DP_AUX_CH_CTL_DONE;
+	value &= ~DP_AUX_CH_CTL_SEND_BUSY;
+	value &= ~DP_AUX_CH_CTL_RECEIVE_ERROR;
+
+	if (data_valid)
+		value &= ~DP_AUX_CH_CTL_TIME_OUT_ERROR;
+	else
+		value |= DP_AUX_CH_CTL_TIME_OUT_ERROR;
+
+	/* message size */
+	value &= ~(0xf << 20);
+	value |= (len << 20);
+	vgpu_vreg(vgpu, reg) = value;
+
+	if (value & DP_AUX_CH_CTL_INTERRUPT)
+		return trigger_aux_channel_interrupt(vgpu, reg);
+	return 0;
+}
+
+static void dp_aux_ch_ctl_link_training(struct intel_vgpu_dpcd_data *dpcd,
+		u8 t)
+{
+	if ((t & DPCD_TRAINING_PATTERN_SET_MASK) == DPCD_TRAINING_PATTERN_1) {
+		/* training pattern 1 for CR */
+		/* set LANE0_CR_DONE, LANE1_CR_DONE */
+		dpcd->data[DPCD_LANE0_1_STATUS] |= DPCD_LANES_CR_DONE;
+		/* set LANE2_CR_DONE, LANE3_CR_DONE */
+		dpcd->data[DPCD_LANE2_3_STATUS] |= DPCD_LANES_CR_DONE;
+	} else if ((t & DPCD_TRAINING_PATTERN_SET_MASK) ==
+			DPCD_TRAINING_PATTERN_2) {
+		/* training pattern 2 for EQ */
+		/* Set CHANNEL_EQ_DONE and  SYMBOL_LOCKED for Lane0_1 */
+		dpcd->data[DPCD_LANE0_1_STATUS] |= DPCD_LANES_EQ_DONE;
+		dpcd->data[DPCD_LANE0_1_STATUS] |= DPCD_SYMBOL_LOCKED;
+		/* Set CHANNEL_EQ_DONE and  SYMBOL_LOCKED for Lane2_3 */
+		dpcd->data[DPCD_LANE2_3_STATUS] |= DPCD_LANES_EQ_DONE;
+		dpcd->data[DPCD_LANE2_3_STATUS] |= DPCD_SYMBOL_LOCKED;
+		/* set INTERLANE_ALIGN_DONE */
+		dpcd->data[DPCD_LANE_ALIGN_STATUS_UPDATED] |=
+			DPCD_INTERLANE_ALIGN_DONE;
+	} else if ((t & DPCD_TRAINING_PATTERN_SET_MASK) ==
+			DPCD_LINK_TRAINING_DISABLED) {
+		/* finish link training */
+		/* set sink status as synchronized */
+		dpcd->data[DPCD_SINK_STATUS] = DPCD_SINK_IN_SYNC;
+	}
+}
+
+#define _REG_HSW_DP_AUX_CH_CTL(dp) \
+	((dp) ? (_PCH_DPB_AUX_CH_CTL + ((dp)-1)*0x100) : 0x64010)
+
+#define _REG_SKL_DP_AUX_CH_CTL(dp) (0x64010 + (dp) * 0x100)
+
+#define OFFSET_TO_DP_AUX_PORT(offset) (((offset) & 0xF00) >> 8)
+
+#define dpy_is_valid_port(port)	\
+		(((port) >= PORT_A) && ((port) < I915_MAX_PORTS))
+
+static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	struct intel_vgpu_display *display = &vgpu->display;
+	int msg, addr, ctrl, op, len;
+	int port_index = OFFSET_TO_DP_AUX_PORT(offset);
+	struct intel_vgpu_dpcd_data *dpcd = NULL;
+	struct intel_vgpu_port *port = NULL;
+	u32 data;
+
+	if (!dpy_is_valid_port(port_index)) {
+		gvt_vgpu_err("Unsupported DP port access!\n");
+		return 0;
+	}
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	data = vgpu_vreg(vgpu, offset);
+
+	if ((GRAPHICS_VER(vgpu->gvt->gt->i915) >= 9)
+		&& offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) {
+		/* SKL DPB/C/D aux ctl register changed */
+		return 0;
+	} else if (IS_BROADWELL(vgpu->gvt->gt->i915) &&
+		   offset != _REG_HSW_DP_AUX_CH_CTL(port_index)) {
+		/* write to the data registers */
+		return 0;
+	}
+
+	if (!(data & DP_AUX_CH_CTL_SEND_BUSY)) {
+		/* just want to clear the sticky bits */
+		vgpu_vreg(vgpu, offset) = 0;
+		return 0;
+	}
+
+	port = &display->ports[port_index];
+	dpcd = port->dpcd;
+
+	/* read out message from DATA1 register */
+	msg = vgpu_vreg(vgpu, offset + 4);
+	addr = (msg >> 8) & 0xffff;
+	ctrl = (msg >> 24) & 0xff;
+	len = msg & 0xff;
+	op = ctrl >> 4;
+
+	if (op == GVT_AUX_NATIVE_WRITE) {
+		int t;
+		u8 buf[16];
+
+		if ((addr + len + 1) >= DPCD_SIZE) {
+			/*
+			 * Write request exceeds what we supported,
+			 * DCPD spec: When a Source Device is writing a DPCD
+			 * address not supported by the Sink Device, the Sink
+			 * Device shall reply with AUX NACK and “M” equal to
+			 * zero.
+			 */
+
+			/* NAK the write */
+			vgpu_vreg(vgpu, offset + 4) = AUX_NATIVE_REPLY_NAK;
+			dp_aux_ch_ctl_trans_done(vgpu, data, offset, 2, true);
+			return 0;
+		}
+
+		/*
+		 * Write request format: Headr (command + address + size) occupies
+		 * 4 bytes, followed by (len + 1) bytes of data. See details at
+		 * intel_dp_aux_transfer().
+		 */
+		if ((len + 1 + 4) > AUX_BURST_SIZE) {
+			gvt_vgpu_err("dp_aux_header: len %d is too large\n", len);
+			return -EINVAL;
+		}
+
+		/* unpack data from vreg to buf */
+		for (t = 0; t < 4; t++) {
+			u32 r = vgpu_vreg(vgpu, offset + 8 + t * 4);
+
+			buf[t * 4] = (r >> 24) & 0xff;
+			buf[t * 4 + 1] = (r >> 16) & 0xff;
+			buf[t * 4 + 2] = (r >> 8) & 0xff;
+			buf[t * 4 + 3] = r & 0xff;
+		}
+
+		/* write to virtual DPCD */
+		if (dpcd && dpcd->data_valid) {
+			for (t = 0; t <= len; t++) {
+				int p = addr + t;
+
+				dpcd->data[p] = buf[t];
+				/* check for link training */
+				if (p == DPCD_TRAINING_PATTERN_SET)
+					dp_aux_ch_ctl_link_training(dpcd,
+							buf[t]);
+			}
+		}
+
+		/* ACK the write */
+		vgpu_vreg(vgpu, offset + 4) = 0;
+		dp_aux_ch_ctl_trans_done(vgpu, data, offset, 1,
+				dpcd && dpcd->data_valid);
+		return 0;
+	}
+
+	if (op == GVT_AUX_NATIVE_READ) {
+		int idx, i, ret = 0;
+
+		if ((addr + len + 1) >= DPCD_SIZE) {
+			/*
+			 * read request exceeds what we supported
+			 * DPCD spec: A Sink Device receiving a Native AUX CH
+			 * read request for an unsupported DPCD address must
+			 * reply with an AUX ACK and read data set equal to
+			 * zero instead of replying with AUX NACK.
+			 */
+
+			/* ACK the READ*/
+			vgpu_vreg(vgpu, offset + 4) = 0;
+			vgpu_vreg(vgpu, offset + 8) = 0;
+			vgpu_vreg(vgpu, offset + 12) = 0;
+			vgpu_vreg(vgpu, offset + 16) = 0;
+			vgpu_vreg(vgpu, offset + 20) = 0;
+
+			dp_aux_ch_ctl_trans_done(vgpu, data, offset, len + 2,
+					true);
+			return 0;
+		}
+
+		for (idx = 1; idx <= 5; idx++) {
+			/* clear the data registers */
+			vgpu_vreg(vgpu, offset + 4 * idx) = 0;
+		}
+
+		/*
+		 * Read reply format: ACK (1 byte) plus (len + 1) bytes of data.
+		 */
+		if ((len + 2) > AUX_BURST_SIZE) {
+			gvt_vgpu_err("dp_aux_header: len %d is too large\n", len);
+			return -EINVAL;
+		}
+
+		/* read from virtual DPCD to vreg */
+		/* first 4 bytes: [ACK][addr][addr+1][addr+2] */
+		if (dpcd && dpcd->data_valid) {
+			for (i = 1; i <= (len + 1); i++) {
+				int t;
+
+				t = dpcd->data[addr + i - 1];
+				t <<= (24 - 8 * (i % 4));
+				ret |= t;
+
+				if ((i % 4 == 3) || (i == (len + 1))) {
+					vgpu_vreg(vgpu, offset +
+							(i / 4 + 1) * 4) = ret;
+					ret = 0;
+				}
+			}
+		}
+		dp_aux_ch_ctl_trans_done(vgpu, data, offset, len + 2,
+				dpcd && dpcd->data_valid);
+		return 0;
+	}
+
+	/* i2c transaction starts */
+	intel_gvt_i2c_handle_aux_ch_write(vgpu, port_index, offset, p_data);
+
+	if (data & DP_AUX_CH_CTL_INTERRUPT)
+		trigger_aux_channel_interrupt(vgpu, offset);
+	return 0;
+}
+
+static int mbctl_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	*(u32 *)p_data &= (~GEN6_MBCTL_ENABLE_BOOT_FETCH);
+	write_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
+static int vga_control_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	bool vga_disable;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	vga_disable = vgpu_vreg(vgpu, offset) & VGA_DISP_DISABLE;
+
+	gvt_dbg_core("vgpu%d: %s VGA mode\n", vgpu->id,
+			vga_disable ? "Disable" : "Enable");
+	return 0;
+}
+
+static u32 read_virtual_sbi_register(struct intel_vgpu *vgpu,
+		unsigned int sbi_offset)
+{
+	struct intel_vgpu_display *display = &vgpu->display;
+	int num = display->sbi.number;
+	int i;
+
+	for (i = 0; i < num; ++i)
+		if (display->sbi.registers[i].offset == sbi_offset)
+			break;
+
+	if (i == num)
+		return 0;
+
+	return display->sbi.registers[i].value;
+}
+
+static void write_virtual_sbi_register(struct intel_vgpu *vgpu,
+		unsigned int offset, u32 value)
+{
+	struct intel_vgpu_display *display = &vgpu->display;
+	int num = display->sbi.number;
+	int i;
+
+	for (i = 0; i < num; ++i) {
+		if (display->sbi.registers[i].offset == offset)
+			break;
+	}
+
+	if (i == num) {
+		if (num == SBI_REG_MAX) {
+			gvt_vgpu_err("SBI caching meets maximum limits\n");
+			return;
+		}
+		display->sbi.number++;
+	}
+
+	display->sbi.registers[i].offset = offset;
+	display->sbi.registers[i].value = value;
+}
+
+static int sbi_data_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
+				SBI_OPCODE_SHIFT) == SBI_CMD_CRRD) {
+		unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) &
+				SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT;
+		vgpu_vreg(vgpu, offset) = read_virtual_sbi_register(vgpu,
+				sbi_offset);
+	}
+	read_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
+static int sbi_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 data;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	data = vgpu_vreg(vgpu, offset);
+
+	data &= ~(SBI_STAT_MASK << SBI_STAT_SHIFT);
+	data |= SBI_READY;
+
+	data &= ~(SBI_RESPONSE_MASK << SBI_RESPONSE_SHIFT);
+	data |= SBI_RESPONSE_SUCCESS;
+
+	vgpu_vreg(vgpu, offset) = data;
+
+	if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
+				SBI_OPCODE_SHIFT) == SBI_CMD_CRWR) {
+		unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) &
+				SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT;
+
+		write_virtual_sbi_register(vgpu, sbi_offset,
+					   vgpu_vreg_t(vgpu, SBI_DATA));
+	}
+	return 0;
+}
+
+#define _vgtif_reg(x) \
+	(VGT_PVINFO_PAGE + offsetof(struct vgt_if, x))
+
+static int pvinfo_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	bool invalid_read = false;
+
+	read_vreg(vgpu, offset, p_data, bytes);
+
+	switch (offset) {
+	case _vgtif_reg(magic) ... _vgtif_reg(vgt_id):
+		if (offset + bytes > _vgtif_reg(vgt_id) + 4)
+			invalid_read = true;
+		break;
+	case _vgtif_reg(avail_rs.mappable_gmadr.base) ...
+		_vgtif_reg(avail_rs.fence_num):
+		if (offset + bytes >
+			_vgtif_reg(avail_rs.fence_num) + 4)
+			invalid_read = true;
+		break;
+	case 0x78010:	/* vgt_caps */
+	case 0x7881c:
+		break;
+	default:
+		invalid_read = true;
+		break;
+	}
+	if (invalid_read)
+		gvt_vgpu_err("invalid pvinfo read: [%x:%x] = %x\n",
+				offset, bytes, *(u32 *)p_data);
+	vgpu->pv_notified = true;
+	return 0;
+}
+
+static int handle_g2v_notification(struct intel_vgpu *vgpu, int notification)
+{
+	enum intel_gvt_gtt_type root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
+	struct intel_vgpu_mm *mm;
+	u64 *pdps;
+
+	pdps = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
+
+	switch (notification) {
+	case VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE:
+		root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
+		fallthrough;
+	case VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE:
+		mm = intel_vgpu_get_ppgtt_mm(vgpu, root_entry_type, pdps);
+		return PTR_ERR_OR_ZERO(mm);
+	case VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY:
+	case VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY:
+		return intel_vgpu_put_ppgtt_mm(vgpu, pdps);
+	case VGT_G2V_EXECLIST_CONTEXT_CREATE:
+	case VGT_G2V_EXECLIST_CONTEXT_DESTROY:
+	case 1:	/* Remove this in guest driver. */
+		break;
+	default:
+		gvt_vgpu_err("Invalid PV notification %d\n", notification);
+	}
+	return 0;
+}
+
+static int send_display_ready_uevent(struct intel_vgpu *vgpu, int ready)
+{
+	struct kobject *kobj = &vgpu->gvt->gt->i915->drm.primary->kdev->kobj;
+	char *env[3] = {NULL, NULL, NULL};
+	char vmid_str[20];
+	char display_ready_str[20];
+
+	snprintf(display_ready_str, 20, "GVT_DISPLAY_READY=%d", ready);
+	env[0] = display_ready_str;
+
+	snprintf(vmid_str, 20, "VMID=%d", vgpu->id);
+	env[1] = vmid_str;
+
+	return kobject_uevent_env(kobj, KOBJ_ADD, env);
+}
+
+static int pvinfo_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 data = *(u32 *)p_data;
+	bool invalid_write = false;
+
+	switch (offset) {
+	case _vgtif_reg(display_ready):
+		send_display_ready_uevent(vgpu, data ? 1 : 0);
+		break;
+	case _vgtif_reg(g2v_notify):
+		handle_g2v_notification(vgpu, data);
+		break;
+	/* add xhot and yhot to handled list to avoid error log */
+	case _vgtif_reg(cursor_x_hot):
+	case _vgtif_reg(cursor_y_hot):
+	case _vgtif_reg(pdp[0].lo):
+	case _vgtif_reg(pdp[0].hi):
+	case _vgtif_reg(pdp[1].lo):
+	case _vgtif_reg(pdp[1].hi):
+	case _vgtif_reg(pdp[2].lo):
+	case _vgtif_reg(pdp[2].hi):
+	case _vgtif_reg(pdp[3].lo):
+	case _vgtif_reg(pdp[3].hi):
+	case _vgtif_reg(execlist_context_descriptor_lo):
+	case _vgtif_reg(execlist_context_descriptor_hi):
+		break;
+	case _vgtif_reg(rsv5[0])..._vgtif_reg(rsv5[3]):
+		invalid_write = true;
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_INSUFFICIENT_RESOURCE);
+		break;
+	default:
+		invalid_write = true;
+		gvt_vgpu_err("invalid pvinfo write offset %x bytes %x data %x\n",
+				offset, bytes, data);
+		break;
+	}
+
+	if (!invalid_write)
+		write_vreg(vgpu, offset, p_data, bytes);
+
+	return 0;
+}
+
+static int pf_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	u32 val = *(u32 *)p_data;
+
+	if ((offset == _PS_1A_CTRL || offset == _PS_2A_CTRL ||
+	   offset == _PS_1B_CTRL || offset == _PS_2B_CTRL ||
+	   offset == _PS_1C_CTRL) && (val & PS_BINDING_MASK) != PS_BINDING_PIPE) {
+		drm_WARN_ONCE(&i915->drm, true,
+			      "VM(%d): guest is trying to scaling a plane\n",
+			      vgpu->id);
+		return 0;
+	}
+
+	return intel_vgpu_default_mmio_write(vgpu, offset, p_data, bytes);
+}
+
+static int power_well_ctl_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (vgpu_vreg(vgpu, offset) &
+	    HSW_PWR_WELL_CTL_REQ(HSW_PW_CTL_IDX_GLOBAL))
+		vgpu_vreg(vgpu, offset) |=
+			HSW_PWR_WELL_CTL_STATE(HSW_PW_CTL_IDX_GLOBAL);
+	else
+		vgpu_vreg(vgpu, offset) &=
+			~HSW_PWR_WELL_CTL_STATE(HSW_PW_CTL_IDX_GLOBAL);
+	return 0;
+}
+
+static int gen9_dbuf_ctl_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (vgpu_vreg(vgpu, offset) & DBUF_POWER_REQUEST)
+		vgpu_vreg(vgpu, offset) |= DBUF_POWER_STATE;
+	else
+		vgpu_vreg(vgpu, offset) &= ~DBUF_POWER_STATE;
+
+	return 0;
+}
+
+static int fpga_dbg_mmio_write(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (vgpu_vreg(vgpu, offset) & FPGA_DBG_RM_NOCLAIM)
+		vgpu_vreg(vgpu, offset) &= ~FPGA_DBG_RM_NOCLAIM;
+	return 0;
+}
+
+static int dma_ctrl_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	u32 mode;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	mode = vgpu_vreg(vgpu, offset);
+
+	if (GFX_MODE_BIT_SET_IN_MASK(mode, START_DMA)) {
+		drm_WARN_ONCE(&i915->drm, 1,
+				"VM(%d): iGVT-g doesn't support GuC\n",
+				vgpu->id);
+		return 0;
+	}
+
+	return 0;
+}
+
+static int gen9_trtte_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	u32 trtte = *(u32 *)p_data;
+
+	if ((trtte & 1) && (trtte & (1 << 1)) == 0) {
+		drm_WARN(&i915->drm, 1,
+				"VM(%d): Use physical address for TRTT!\n",
+				vgpu->id);
+		return -EINVAL;
+	}
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	return 0;
+}
+
+static int gen9_trtt_chicken_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
+static int dpll_status_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 v = 0;
+
+	if (vgpu_vreg(vgpu, 0x46010) & (1 << 31))
+		v |= (1 << 0);
+
+	if (vgpu_vreg(vgpu, 0x46014) & (1 << 31))
+		v |= (1 << 8);
+
+	if (vgpu_vreg(vgpu, 0x46040) & (1 << 31))
+		v |= (1 << 16);
+
+	if (vgpu_vreg(vgpu, 0x46060) & (1 << 31))
+		v |= (1 << 24);
+
+	vgpu_vreg(vgpu, offset) = v;
+
+	return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
+}
+
+static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 value = *(u32 *)p_data;
+	u32 cmd = value & 0xff;
+	u32 *data0 = &vgpu_vreg_t(vgpu, GEN6_PCODE_DATA);
+
+	switch (cmd) {
+	case GEN9_PCODE_READ_MEM_LATENCY:
+		if (IS_SKYLAKE(vgpu->gvt->gt->i915) ||
+		    IS_KABYLAKE(vgpu->gvt->gt->i915) ||
+		    IS_COFFEELAKE(vgpu->gvt->gt->i915) ||
+		    IS_COMETLAKE(vgpu->gvt->gt->i915)) {
+			/**
+			 * "Read memory latency" command on gen9.
+			 * Below memory latency values are read
+			 * from skylake platform.
+			 */
+			if (!*data0)
+				*data0 = 0x1e1a1100;
+			else
+				*data0 = 0x61514b3d;
+		} else if (IS_BROXTON(vgpu->gvt->gt->i915)) {
+			/**
+			 * "Read memory latency" command on gen9.
+			 * Below memory latency values are read
+			 * from Broxton MRB.
+			 */
+			if (!*data0)
+				*data0 = 0x16080707;
+			else
+				*data0 = 0x16161616;
+		}
+		break;
+	case SKL_PCODE_CDCLK_CONTROL:
+		if (IS_SKYLAKE(vgpu->gvt->gt->i915) ||
+		    IS_KABYLAKE(vgpu->gvt->gt->i915) ||
+		    IS_COFFEELAKE(vgpu->gvt->gt->i915) ||
+		    IS_COMETLAKE(vgpu->gvt->gt->i915))
+			*data0 = SKL_CDCLK_READY_FOR_CHANGE;
+		break;
+	case GEN6_PCODE_READ_RC6VIDS:
+		*data0 |= 0x1;
+		break;
+	}
+
+	gvt_dbg_core("VM(%d) write %x to mailbox, return data0 %x\n",
+		     vgpu->id, value, *data0);
+	/**
+	 * PCODE_READY clear means ready for pcode read/write,
+	 * PCODE_ERROR_MASK clear means no error happened. In GVT-g we
+	 * always emulate as pcode read/write success and ready for access
+	 * anytime, since we don't touch real physical registers here.
+	 */
+	value &= ~(GEN6_PCODE_READY | GEN6_PCODE_ERROR_MASK);
+	return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes);
+}
+
+static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 value = *(u32 *)p_data;
+	const struct intel_engine_cs *engine =
+		intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
+
+	if (value != 0 &&
+	    !intel_gvt_ggtt_validate_range(vgpu, value, I915_GTT_PAGE_SIZE)) {
+		gvt_vgpu_err("write invalid HWSP address, reg:0x%x, value:0x%x\n",
+			      offset, value);
+		return -EINVAL;
+	}
+
+	/*
+	 * Need to emulate all the HWSP register write to ensure host can
+	 * update the VM CSB status correctly. Here listed registers can
+	 * support BDW, SKL or other platforms with same HWSP registers.
+	 */
+	if (unlikely(!engine)) {
+		gvt_vgpu_err("access unknown hardware status page register:0x%x\n",
+			     offset);
+		return -EINVAL;
+	}
+	vgpu->hws_pga[engine->id] = value;
+	gvt_dbg_mmio("VM(%d) write: 0x%x to HWSP: 0x%x\n",
+		     vgpu->id, value, offset);
+
+	return intel_vgpu_default_mmio_write(vgpu, offset, &value, bytes);
+}
+
+static int skl_power_well_ctl_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 v = *(u32 *)p_data;
+
+	if (IS_BROXTON(vgpu->gvt->gt->i915))
+		v &= (1 << 31) | (1 << 29);
+	else
+		v &= (1 << 31) | (1 << 29) | (1 << 9) |
+			(1 << 7) | (1 << 5) | (1 << 3) | (1 << 1);
+	v |= (v >> 1);
+
+	return intel_vgpu_default_mmio_write(vgpu, offset, &v, bytes);
+}
+
+static int skl_lcpll_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 v = *(u32 *)p_data;
+
+	/* other bits are MBZ. */
+	v &= (1 << 31) | (1 << 30);
+	v & (1 << 31) ? (v |= (1 << 30)) : (v &= ~(1 << 30));
+
+	vgpu_vreg(vgpu, offset) = v;
+
+	return 0;
+}
+
+static int bxt_de_pll_enable_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 v = *(u32 *)p_data;
+
+	if (v & BXT_DE_PLL_PLL_ENABLE)
+		v |= BXT_DE_PLL_LOCK;
+
+	vgpu_vreg(vgpu, offset) = v;
+
+	return 0;
+}
+
+static int bxt_port_pll_enable_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 v = *(u32 *)p_data;
+
+	if (v & PORT_PLL_ENABLE)
+		v |= PORT_PLL_LOCK;
+
+	vgpu_vreg(vgpu, offset) = v;
+
+	return 0;
+}
+
+static int bxt_phy_ctl_family_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 v = *(u32 *)p_data;
+	u32 data = v & COMMON_RESET_DIS ? BXT_PHY_LANE_ENABLED : 0;
+
+	switch (offset) {
+	case _PHY_CTL_FAMILY_EDP:
+		vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_A) = data;
+		break;
+	case _PHY_CTL_FAMILY_DDI:
+		vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_B) = data;
+		vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_C) = data;
+		break;
+	}
+
+	vgpu_vreg(vgpu, offset) = v;
+
+	return 0;
+}
+
+static int bxt_port_tx_dw3_read(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 v = vgpu_vreg(vgpu, offset);
+
+	v &= ~UNIQUE_TRANGE_EN_METHOD;
+
+	vgpu_vreg(vgpu, offset) = v;
+
+	return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
+}
+
+static int bxt_pcs_dw12_grp_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 v = *(u32 *)p_data;
+
+	if (offset == _PORT_PCS_DW12_GRP_A || offset == _PORT_PCS_DW12_GRP_B) {
+		vgpu_vreg(vgpu, offset - 0x600) = v;
+		vgpu_vreg(vgpu, offset - 0x800) = v;
+	} else {
+		vgpu_vreg(vgpu, offset - 0x400) = v;
+		vgpu_vreg(vgpu, offset - 0x600) = v;
+	}
+
+	vgpu_vreg(vgpu, offset) = v;
+
+	return 0;
+}
+
+static int bxt_gt_disp_pwron_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 v = *(u32 *)p_data;
+
+	if (v & BIT(0)) {
+		vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &=
+			~PHY_RESERVED;
+		vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) |=
+			PHY_POWER_GOOD;
+	}
+
+	if (v & BIT(1)) {
+		vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) &=
+			~PHY_RESERVED;
+		vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) |=
+			PHY_POWER_GOOD;
+	}
+
+
+	vgpu_vreg(vgpu, offset) = v;
+
+	return 0;
+}
+
+static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	vgpu_vreg(vgpu, offset) = 0;
+	return 0;
+}
+
+/*
+ * FixMe:
+ * If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did:
+ * 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.)
+ * Due to the missing flush of bb filled by VM vCPU, host GPU hangs on executing
+ * these MI_BATCH_BUFFER.
+ * Temporarily workaround this by setting SNOOP bit for PAT3 used by PPGTT
+ * PML4 PTE: PAT(0) PCD(1) PWT(1).
+ * The performance is still expected to be low, will need further improvement.
+ */
+static int bxt_ppat_low_write(struct intel_vgpu *vgpu, unsigned int offset,
+			      void *p_data, unsigned int bytes)
+{
+	u64 pat =
+		GEN8_PPAT(0, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(1, 0) |
+		GEN8_PPAT(2, 0) |
+		GEN8_PPAT(3, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(4, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(5, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(6, CHV_PPAT_SNOOP) |
+		GEN8_PPAT(7, CHV_PPAT_SNOOP);
+
+	vgpu_vreg(vgpu, offset) = lower_32_bits(pat);
+
+	return 0;
+}
+
+static int guc_status_read(struct intel_vgpu *vgpu,
+			   unsigned int offset, void *p_data,
+			   unsigned int bytes)
+{
+	/* keep MIA_IN_RESET before clearing */
+	read_vreg(vgpu, offset, p_data, bytes);
+	vgpu_vreg(vgpu, offset) &= ~GS_MIA_IN_RESET;
+	return 0;
+}
+
+static int mmio_read_from_hw(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	const struct intel_engine_cs *engine =
+		intel_gvt_render_mmio_to_engine(gvt, offset);
+
+	/**
+	 * Read HW reg in following case
+	 * a. the offset isn't a ring mmio
+	 * b. the offset's ring is running on hw.
+	 * c. the offset is ring time stamp mmio
+	 */
+
+	if (!engine ||
+	    vgpu == gvt->scheduler.engine_owner[engine->id] ||
+	    offset == i915_mmio_reg_offset(RING_TIMESTAMP(engine->mmio_base)) ||
+	    offset == i915_mmio_reg_offset(RING_TIMESTAMP_UDW(engine->mmio_base))) {
+		mmio_hw_access_pre(gvt->gt);
+		vgpu_vreg(vgpu, offset) =
+			intel_uncore_read(gvt->gt->uncore, _MMIO(offset));
+		mmio_hw_access_post(gvt->gt);
+	}
+
+	return intel_vgpu_default_mmio_read(vgpu, offset, p_data, bytes);
+}
+
+static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	const struct intel_engine_cs *engine = intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
+	struct intel_vgpu_execlist *execlist;
+	u32 data = *(u32 *)p_data;
+	int ret = 0;
+
+	if (drm_WARN_ON(&i915->drm, !engine))
+		return -EINVAL;
+
+	/*
+	 * Due to d3_entered is used to indicate skipping PPGTT invalidation on
+	 * vGPU reset, it's set on D0->D3 on PCI config write, and cleared after
+	 * vGPU reset if in resuming.
+	 * In S0ix exit, the device power state also transite from D3 to D0 as
+	 * S3 resume, but no vGPU reset (triggered by QEMU devic model). After
+	 * S0ix exit, all engines continue to work. However the d3_entered
+	 * remains set which will break next vGPU reset logic (miss the expected
+	 * PPGTT invalidation).
+	 * Engines can only work in D0. Thus the 1st elsp write gives GVT a
+	 * chance to clear d3_entered.
+	 */
+	if (vgpu->d3_entered)
+		vgpu->d3_entered = false;
+
+	execlist = &vgpu->submission.execlist[engine->id];
+
+	execlist->elsp_dwords.data[3 - execlist->elsp_dwords.index] = data;
+	if (execlist->elsp_dwords.index == 3) {
+		ret = intel_vgpu_submit_execlist(vgpu, engine);
+		if(ret)
+			gvt_vgpu_err("fail submit workload on ring %s\n",
+				     engine->name);
+	}
+
+	++execlist->elsp_dwords.index;
+	execlist->elsp_dwords.index &= 0x3;
+	return ret;
+}
+
+static int ring_mode_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 data = *(u32 *)p_data;
+	const struct intel_engine_cs *engine =
+		intel_gvt_render_mmio_to_engine(vgpu->gvt, offset);
+	bool enable_execlist;
+	int ret;
+
+	(*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(1);
+	if (IS_COFFEELAKE(vgpu->gvt->gt->i915) ||
+	    IS_COMETLAKE(vgpu->gvt->gt->i915))
+		(*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(2);
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (IS_MASKED_BITS_ENABLED(data, 1)) {
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+		return 0;
+	}
+
+	if ((IS_COFFEELAKE(vgpu->gvt->gt->i915) ||
+	     IS_COMETLAKE(vgpu->gvt->gt->i915)) &&
+	    IS_MASKED_BITS_ENABLED(data, 2)) {
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+		return 0;
+	}
+
+	/* when PPGTT mode enabled, we will check if guest has called
+	 * pvinfo, if not, we will treat this guest as non-gvtg-aware
+	 * guest, and stop emulating its cfg space, mmio, gtt, etc.
+	 */
+	if ((IS_MASKED_BITS_ENABLED(data, GFX_PPGTT_ENABLE) ||
+	    IS_MASKED_BITS_ENABLED(data, GFX_RUN_LIST_ENABLE)) &&
+	    !vgpu->pv_notified) {
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+		return 0;
+	}
+	if (IS_MASKED_BITS_ENABLED(data, GFX_RUN_LIST_ENABLE) ||
+	    IS_MASKED_BITS_DISABLED(data, GFX_RUN_LIST_ENABLE)) {
+		enable_execlist = !!(data & GFX_RUN_LIST_ENABLE);
+
+		gvt_dbg_core("EXECLIST %s on ring %s\n",
+			     (enable_execlist ? "enabling" : "disabling"),
+			     engine->name);
+
+		if (!enable_execlist)
+			return 0;
+
+		ret = intel_vgpu_select_submission_ops(vgpu,
+						       engine->mask,
+						       INTEL_VGPU_EXECLIST_SUBMISSION);
+		if (ret)
+			return ret;
+
+		intel_vgpu_start_schedule(vgpu);
+	}
+	return 0;
+}
+
+static int gvt_reg_tlb_control_handler(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	unsigned int id = 0;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	vgpu_vreg(vgpu, offset) = 0;
+
+	switch (offset) {
+	case 0x4260:
+		id = RCS0;
+		break;
+	case 0x4264:
+		id = VCS0;
+		break;
+	case 0x4268:
+		id = VCS1;
+		break;
+	case 0x426c:
+		id = BCS0;
+		break;
+	case 0x4270:
+		id = VECS0;
+		break;
+	default:
+		return -EINVAL;
+	}
+	set_bit(id, (void *)vgpu->submission.tlb_handle_pending);
+
+	return 0;
+}
+
+static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
+	unsigned int offset, void *p_data, unsigned int bytes)
+{
+	u32 data;
+
+	write_vreg(vgpu, offset, p_data, bytes);
+	data = vgpu_vreg(vgpu, offset);
+
+	if (IS_MASKED_BITS_ENABLED(data, RESET_CTL_REQUEST_RESET))
+		data |= RESET_CTL_READY_TO_RESET;
+	else if (data & _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET))
+		data &= ~RESET_CTL_READY_TO_RESET;
+
+	vgpu_vreg(vgpu, offset) = data;
+	return 0;
+}
+
+static int csfe_chicken1_mmio_write(struct intel_vgpu *vgpu,
+				    unsigned int offset, void *p_data,
+				    unsigned int bytes)
+{
+	u32 data = *(u32 *)p_data;
+
+	(*(u32 *)p_data) &= ~_MASKED_BIT_ENABLE(0x18);
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (IS_MASKED_BITS_ENABLED(data, 0x10) ||
+	    IS_MASKED_BITS_ENABLED(data, 0x8))
+		enter_failsafe_mode(vgpu, GVT_FAILSAFE_UNSUPPORTED_GUEST);
+
+	return 0;
+}
+
+#define MMIO_F(reg, s, f, am, rm, d, r, w) do { \
+	ret = setup_mmio_info(gvt, i915_mmio_reg_offset(reg), \
+		s, f, am, rm, d, r, w); \
+	if (ret) \
+		return ret; \
+} while (0)
+
+#define MMIO_DH(reg, d, r, w) \
+	MMIO_F(reg, 4, 0, 0, 0, d, r, w)
+
+#define MMIO_DFH(reg, d, f, r, w) \
+	MMIO_F(reg, 4, f, 0, 0, d, r, w)
+
+#define MMIO_GM(reg, d, r, w) \
+	MMIO_F(reg, 4, F_GMADR, 0xFFFFF000, 0, d, r, w)
+
+#define MMIO_GM_RDR(reg, d, r, w) \
+	MMIO_F(reg, 4, F_GMADR | F_CMD_ACCESS, 0xFFFFF000, 0, d, r, w)
+
+#define MMIO_RO(reg, d, f, rm, r, w) \
+	MMIO_F(reg, 4, F_RO | f, 0, rm, d, r, w)
+
+#define MMIO_RING_F(prefix, s, f, am, rm, d, r, w) do { \
+	MMIO_F(prefix(RENDER_RING_BASE), s, f, am, rm, d, r, w); \
+	MMIO_F(prefix(BLT_RING_BASE), s, f, am, rm, d, r, w); \
+	MMIO_F(prefix(GEN6_BSD_RING_BASE), s, f, am, rm, d, r, w); \
+	MMIO_F(prefix(VEBOX_RING_BASE), s, f, am, rm, d, r, w); \
+	if (HAS_ENGINE(gvt->gt, VCS1)) \
+		MMIO_F(prefix(GEN8_BSD2_RING_BASE), s, f, am, rm, d, r, w); \
+} while (0)
+
+#define MMIO_RING_DFH(prefix, d, f, r, w) \
+	MMIO_RING_F(prefix, 4, f, 0, 0, d, r, w)
+
+#define MMIO_RING_GM(prefix, d, r, w) \
+	MMIO_RING_F(prefix, 4, F_GMADR, 0xFFFF0000, 0, d, r, w)
+
+#define MMIO_RING_GM_RDR(prefix, d, r, w) \
+	MMIO_RING_F(prefix, 4, F_GMADR | F_CMD_ACCESS, 0xFFFF0000, 0, d, r, w)
+
+#define MMIO_RING_RO(prefix, d, f, rm, r, w) \
+	MMIO_RING_F(prefix, 4, F_RO | f, 0, rm, d, r, w)
+
+static int init_generic_mmio_info(struct intel_gvt *gvt)
+{
+	struct drm_i915_private *dev_priv = gvt->gt->i915;
+	int ret;
+
+	MMIO_RING_DFH(RING_IMR, D_ALL, 0, NULL,
+		intel_vgpu_reg_imr_handler);
+
+	MMIO_DFH(SDEIMR, D_ALL, 0, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_DFH(SDEIER, D_ALL, 0, NULL, intel_vgpu_reg_ier_handler);
+	MMIO_DFH(SDEIIR, D_ALL, 0, NULL, intel_vgpu_reg_iir_handler);
+
+	MMIO_RING_DFH(RING_HWSTAM, D_ALL, 0, NULL, NULL);
+
+
+	MMIO_DH(GEN8_GAMW_ECO_DEV_RW_IA, D_BDW_PLUS, NULL,
+		gamw_echo_dev_rw_ia_write);
+
+	MMIO_GM_RDR(BSD_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
+
+#define RING_REG(base) _MMIO((base) + 0x28)
+	MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
+#undef RING_REG
+
+#define RING_REG(base) _MMIO((base) + 0x134)
+	MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
+#undef RING_REG
+
+#define RING_REG(base) _MMIO((base) + 0x6c)
+	MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL);
+#undef RING_REG
+	MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL);
+
+	MMIO_GM_RDR(_MMIO(0x2148), D_ALL, NULL, NULL);
+	MMIO_GM_RDR(CCID(RENDER_RING_BASE), D_ALL, NULL, NULL);
+	MMIO_GM_RDR(_MMIO(0x12198), D_ALL, NULL, NULL);
+
+	MMIO_RING_DFH(RING_TAIL, D_ALL, 0, NULL, NULL);
+	MMIO_RING_DFH(RING_HEAD, D_ALL, 0, NULL, NULL);
+	MMIO_RING_DFH(RING_CTL, D_ALL, 0, NULL, NULL);
+	MMIO_RING_DFH(RING_ACTHD, D_ALL, 0, mmio_read_from_hw, NULL);
+	MMIO_RING_GM(RING_START, D_ALL, NULL, NULL);
+
+	/* RING MODE */
+#define RING_REG(base) _MMIO((base) + 0x29c)
+	MMIO_RING_DFH(RING_REG, D_ALL,
+		F_MODE_MASK | F_CMD_ACCESS | F_CMD_WRITE_PATCH, NULL,
+		ring_mode_mmio_write);
+#undef RING_REG
+
+	MMIO_RING_DFH(RING_MI_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_RING_DFH(RING_INSTPM, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+			NULL, NULL);
+	MMIO_RING_DFH(RING_TIMESTAMP, D_ALL, F_CMD_ACCESS,
+			mmio_read_from_hw, NULL);
+	MMIO_RING_DFH(RING_TIMESTAMP_UDW, D_ALL, F_CMD_ACCESS,
+			mmio_read_from_hw, NULL);
+
+	MMIO_DFH(GEN7_GT_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_0_GEN7, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2124), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0x20dc), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2088), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(FF_SLICE_CS_CHICKEN2, D_ALL,
+		 F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2470), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
+		 NULL, NULL);
+	MMIO_DFH(_MMIO(0x9030), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x20a0), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2420), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2430), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2434), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(HSW_HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+	/* display */
+	MMIO_DH(TRANSCONF(TRANSCODER_A), D_ALL, NULL, pipeconf_mmio_write);
+	MMIO_DH(TRANSCONF(TRANSCODER_B), D_ALL, NULL, pipeconf_mmio_write);
+	MMIO_DH(TRANSCONF(TRANSCODER_C), D_ALL, NULL, pipeconf_mmio_write);
+	MMIO_DH(TRANSCONF(TRANSCODER_EDP), D_ALL, NULL, pipeconf_mmio_write);
+	MMIO_DH(DSPSURF(PIPE_A), D_ALL, NULL, pri_surf_mmio_write);
+	MMIO_DH(REG_50080(PIPE_A, PLANE_PRIMARY), D_ALL, NULL,
+		reg50080_mmio_write);
+	MMIO_DH(DSPSURF(PIPE_B), D_ALL, NULL, pri_surf_mmio_write);
+	MMIO_DH(REG_50080(PIPE_B, PLANE_PRIMARY), D_ALL, NULL,
+		reg50080_mmio_write);
+	MMIO_DH(DSPSURF(PIPE_C), D_ALL, NULL, pri_surf_mmio_write);
+	MMIO_DH(REG_50080(PIPE_C, PLANE_PRIMARY), D_ALL, NULL,
+		reg50080_mmio_write);
+	MMIO_DH(SPRSURF(PIPE_A), D_ALL, NULL, spr_surf_mmio_write);
+	MMIO_DH(REG_50080(PIPE_A, PLANE_SPRITE0), D_ALL, NULL,
+		reg50080_mmio_write);
+	MMIO_DH(SPRSURF(PIPE_B), D_ALL, NULL, spr_surf_mmio_write);
+	MMIO_DH(REG_50080(PIPE_B, PLANE_SPRITE0), D_ALL, NULL,
+		reg50080_mmio_write);
+	MMIO_DH(SPRSURF(PIPE_C), D_ALL, NULL, spr_surf_mmio_write);
+	MMIO_DH(REG_50080(PIPE_C, PLANE_SPRITE0), D_ALL, NULL,
+		reg50080_mmio_write);
+
+	MMIO_F(PCH_GMBUS0, 4 * 4, 0, 0, 0, D_ALL, gmbus_mmio_read,
+		gmbus_mmio_write);
+	MMIO_F(PCH_GPIO_BASE, 6 * 4, F_UNALIGN, 0, 0, D_ALL, NULL, NULL);
+
+	MMIO_F(_MMIO(_PCH_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+		dp_aux_ch_ctl_mmio_write);
+	MMIO_F(_MMIO(_PCH_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+		dp_aux_ch_ctl_mmio_write);
+	MMIO_F(_MMIO(_PCH_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+		dp_aux_ch_ctl_mmio_write);
+
+	MMIO_DH(PCH_ADPA, D_PRE_SKL, NULL, pch_adpa_mmio_write);
+
+	MMIO_DH(_MMIO(_PCH_TRANSACONF), D_ALL, NULL, transconf_mmio_write);
+	MMIO_DH(_MMIO(_PCH_TRANSBCONF), D_ALL, NULL, transconf_mmio_write);
+
+	MMIO_DH(FDI_RX_IIR(PIPE_A), D_ALL, NULL, fdi_rx_iir_mmio_write);
+	MMIO_DH(FDI_RX_IIR(PIPE_B), D_ALL, NULL, fdi_rx_iir_mmio_write);
+	MMIO_DH(FDI_RX_IIR(PIPE_C), D_ALL, NULL, fdi_rx_iir_mmio_write);
+	MMIO_DH(FDI_RX_IMR(PIPE_A), D_ALL, NULL, update_fdi_rx_iir_status);
+	MMIO_DH(FDI_RX_IMR(PIPE_B), D_ALL, NULL, update_fdi_rx_iir_status);
+	MMIO_DH(FDI_RX_IMR(PIPE_C), D_ALL, NULL, update_fdi_rx_iir_status);
+	MMIO_DH(FDI_RX_CTL(PIPE_A), D_ALL, NULL, update_fdi_rx_iir_status);
+	MMIO_DH(FDI_RX_CTL(PIPE_B), D_ALL, NULL, update_fdi_rx_iir_status);
+	MMIO_DH(FDI_RX_CTL(PIPE_C), D_ALL, NULL, update_fdi_rx_iir_status);
+	MMIO_DH(PCH_PP_CONTROL, D_ALL, NULL, pch_pp_control_mmio_write);
+	MMIO_DH(_MMIO(0xe651c), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe661c), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe671c), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe681c), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe6c04), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe6e1c), D_ALL, dpy_reg_mmio_read, NULL);
+
+	MMIO_RO(PCH_PORT_HOTPLUG, D_ALL, 0,
+		PORTA_HOTPLUG_STATUS_MASK
+		| PORTB_HOTPLUG_STATUS_MASK
+		| PORTC_HOTPLUG_STATUS_MASK
+		| PORTD_HOTPLUG_STATUS_MASK,
+		NULL, NULL);
+
+	MMIO_DH(LCPLL_CTL, D_ALL, NULL, lcpll_ctl_mmio_write);
+	MMIO_DH(SOUTH_CHICKEN2, D_ALL, NULL, south_chicken2_mmio_write);
+	MMIO_DH(SFUSE_STRAP, D_ALL, NULL, NULL);
+	MMIO_DH(SBI_DATA, D_ALL, sbi_data_mmio_read, NULL);
+	MMIO_DH(SBI_CTL_STAT, D_ALL, NULL, sbi_ctl_mmio_write);
+
+	MMIO_F(_MMIO(_DPA_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_ALL, NULL,
+		dp_aux_ch_ctl_mmio_write);
+
+	MMIO_DH(DDI_BUF_CTL(PORT_A), D_ALL, NULL, ddi_buf_ctl_mmio_write);
+	MMIO_DH(DDI_BUF_CTL(PORT_B), D_ALL, NULL, ddi_buf_ctl_mmio_write);
+	MMIO_DH(DDI_BUF_CTL(PORT_C), D_ALL, NULL, ddi_buf_ctl_mmio_write);
+	MMIO_DH(DDI_BUF_CTL(PORT_D), D_ALL, NULL, ddi_buf_ctl_mmio_write);
+	MMIO_DH(DDI_BUF_CTL(PORT_E), D_ALL, NULL, ddi_buf_ctl_mmio_write);
+
+	MMIO_DH(DP_TP_CTL(PORT_A), D_ALL, NULL, dp_tp_ctl_mmio_write);
+	MMIO_DH(DP_TP_CTL(PORT_B), D_ALL, NULL, dp_tp_ctl_mmio_write);
+	MMIO_DH(DP_TP_CTL(PORT_C), D_ALL, NULL, dp_tp_ctl_mmio_write);
+	MMIO_DH(DP_TP_CTL(PORT_D), D_ALL, NULL, dp_tp_ctl_mmio_write);
+	MMIO_DH(DP_TP_CTL(PORT_E), D_ALL, NULL, dp_tp_ctl_mmio_write);
+
+	MMIO_DH(DP_TP_STATUS(PORT_A), D_ALL, NULL, dp_tp_status_mmio_write);
+	MMIO_DH(DP_TP_STATUS(PORT_B), D_ALL, NULL, dp_tp_status_mmio_write);
+	MMIO_DH(DP_TP_STATUS(PORT_C), D_ALL, NULL, dp_tp_status_mmio_write);
+	MMIO_DH(DP_TP_STATUS(PORT_D), D_ALL, NULL, dp_tp_status_mmio_write);
+	MMIO_DH(DP_TP_STATUS(PORT_E), D_ALL, NULL, NULL);
+
+	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL);
+	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL);
+	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_C), D_ALL, NULL, NULL);
+	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_EDP), D_ALL, NULL, NULL);
+
+	MMIO_DH(FORCEWAKE, D_ALL, NULL, NULL);
+	MMIO_DFH(GTFIFODBG, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GTFIFOCTL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DH(FORCEWAKE_MT, D_PRE_SKL, NULL, mul_force_wake_write);
+	MMIO_DH(FORCEWAKE_ACK_HSW, D_BDW, NULL, NULL);
+	MMIO_DH(GEN6_RC_CONTROL, D_ALL, NULL, NULL);
+	MMIO_DH(GEN6_RC_STATE, D_ALL, NULL, NULL);
+	MMIO_DH(HSW_PWR_WELL_CTL1, D_BDW, NULL, power_well_ctl_mmio_write);
+	MMIO_DH(HSW_PWR_WELL_CTL2, D_BDW, NULL, power_well_ctl_mmio_write);
+	MMIO_DH(HSW_PWR_WELL_CTL3, D_BDW, NULL, power_well_ctl_mmio_write);
+	MMIO_DH(HSW_PWR_WELL_CTL4, D_BDW, NULL, power_well_ctl_mmio_write);
+	MMIO_DH(HSW_PWR_WELL_CTL5, D_BDW, NULL, power_well_ctl_mmio_write);
+	MMIO_DH(HSW_PWR_WELL_CTL6, D_BDW, NULL, power_well_ctl_mmio_write);
+
+	MMIO_DH(GEN6_GDRST, D_ALL, NULL, gdrst_mmio_write);
+	MMIO_F(FENCE_REG_GEN6_LO(0), 0x80, 0, 0, 0, D_ALL, fence_mmio_read, fence_mmio_write);
+	MMIO_DH(CPU_VGACNTRL, D_ALL, NULL, vga_control_mmio_write);
+
+	MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL);
+	MMIO_DH(GFX_FLSH_CNTL_GEN6, D_ALL, NULL, NULL);
+
+	MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write);
+	MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DH(FPGA_DBG, D_ALL, NULL, fpga_dbg_mmio_write);
+	MMIO_DFH(_MMIO(0x215c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2178), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x217c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x12178), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x1217c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_F(_MMIO(0x2290), 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL);
+	MMIO_F(_MMIO(0x5200), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x5240), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x5280), 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0x1c17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x1c178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(DS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(IA_VERTICES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(IA_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(VS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(GS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(GS_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(CL_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_DH(_MMIO(0x4260), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DH(_MMIO(0x4264), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DH(_MMIO(0x4268), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DH(_MMIO(0x426c), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DH(_MMIO(0x4270), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DFH(_MMIO(0x4094), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_GM(RING_BBADDR, D_ALL, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2220), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x12220), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x22220), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x22178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x1a178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x1a17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2217c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
+	MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
+	MMIO_DH(GUC_STATUS, D_ALL, guc_status_read, NULL);
+
+	return 0;
+}
+
+static int init_bdw_mmio_info(struct intel_gvt *gvt)
+{
+	int ret;
+
+	MMIO_DH(GEN8_GT_IMR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_GT_IER(0), D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_GT_IIR(0), D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_GT_IMR(1), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_GT_IER(1), D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_GT_IIR(1), D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_GT_IMR(2), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_GT_IER(2), D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_GT_IIR(2), D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_GT_IMR(3), D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_GT_IER(3), D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_GT_IIR(3), D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_DE_PIPE_IMR(PIPE_A), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_DE_PIPE_IER(PIPE_A), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_DE_PIPE_IIR(PIPE_A), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_DE_PIPE_IMR(PIPE_B), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_DE_PIPE_IER(PIPE_B), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_DE_PIPE_IIR(PIPE_B), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_DE_PIPE_IMR(PIPE_C), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_DE_PIPE_IER(PIPE_C), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_DE_PIPE_IIR(PIPE_C), D_BDW_PLUS, NULL,
+		intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_DE_PORT_IMR, D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_DE_PORT_IER, D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_DE_PORT_IIR, D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_DE_MISC_IMR, D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_DE_MISC_IER, D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_DE_MISC_IIR, D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_PCU_IMR, D_BDW_PLUS, NULL, intel_vgpu_reg_imr_handler);
+	MMIO_DH(GEN8_PCU_IER, D_BDW_PLUS, NULL, intel_vgpu_reg_ier_handler);
+	MMIO_DH(GEN8_PCU_IIR, D_BDW_PLUS, NULL, intel_vgpu_reg_iir_handler);
+
+	MMIO_DH(GEN8_MASTER_IRQ, D_BDW_PLUS, NULL,
+		intel_vgpu_reg_master_irq_handler);
+
+	MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, 0,
+		mmio_read_from_hw, NULL);
+
+#define RING_REG(base) _MMIO((base) + 0xd0)
+	MMIO_RING_F(RING_REG, 4, F_RO, 0,
+		~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL,
+		ring_reset_ctl_write);
+#undef RING_REG
+
+#define RING_REG(base) _MMIO((base) + 0x230)
+	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write);
+#undef RING_REG
+
+#define RING_REG(base) _MMIO((base) + 0x234)
+	MMIO_RING_F(RING_REG, 8, F_RO, 0, ~0, D_BDW_PLUS,
+		NULL, NULL);
+#undef RING_REG
+
+#define RING_REG(base) _MMIO((base) + 0x244)
+	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+#undef RING_REG
+
+#define RING_REG(base) _MMIO((base) + 0x370)
+	MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL);
+#undef RING_REG
+
+#define RING_REG(base) _MMIO((base) + 0x3a0)
+	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
+#undef RING_REG
+
+	MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
+
+#define RING_REG(base) _MMIO((base) + 0x270)
+	MMIO_RING_F(RING_REG, 32, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL);
+#undef RING_REG
+
+	MMIO_RING_GM(RING_HWS_PGA, D_BDW_PLUS, NULL, hws_pga_write);
+
+	MMIO_DFH(HDC_CHICKEN0, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+	MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0xb1f0), D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xb1c0), D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xb100), D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS | F_CMD_WRITE_PATCH, 0, 0,
+		D_BDW_PLUS, NULL, force_nonpriv_write);
+
+	MMIO_DFH(_MMIO(0x83a4), D_BDW, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0x8430), D_BDW, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0xe194), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe188), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2580), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0x2248), D_BDW, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0xe220), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe230), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe240), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe260), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe270), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe280), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x21f0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	return 0;
+}
+
+static int init_skl_mmio_info(struct intel_gvt *gvt)
+{
+	struct drm_i915_private *dev_priv = gvt->gt->i915;
+	int ret;
+
+	MMIO_DH(FORCEWAKE_RENDER_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
+	MMIO_DH(FORCEWAKE_ACK_RENDER_GEN9, D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(FORCEWAKE_GT_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
+	MMIO_DH(FORCEWAKE_ACK_GT_GEN9, D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
+	MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL);
+
+	MMIO_F(DP_AUX_CH_CTL(AUX_CH_B), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);
+	MMIO_F(DP_AUX_CH_CTL(AUX_CH_C), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);
+	MMIO_F(DP_AUX_CH_CTL(AUX_CH_D), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);
+
+	MMIO_DH(HSW_PWR_WELL_CTL2, D_SKL_PLUS, NULL, skl_power_well_ctl_write);
+
+	MMIO_DH(DBUF_CTL_S(0), D_SKL_PLUS, NULL, gen9_dbuf_ctl_mmio_write);
+
+	MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(MMCD_MISC_CTRL, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DH(CHICKEN_PAR1_1, D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(LCPLL1_CTL, D_SKL_PLUS, NULL, skl_lcpll_write);
+	MMIO_DH(LCPLL2_CTL, D_SKL_PLUS, NULL, skl_lcpll_write);
+	MMIO_DH(DPLL_STATUS, D_SKL_PLUS, dpll_status_read, NULL);
+
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);
+
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);
+
+	MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);
+
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DFH(BDW_SCRATCH1, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_F(GEN9_GFX_MOCS(0), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL_PLUS,
+		NULL, NULL);
+	MMIO_F(GEN7_L3CNTLREG2, 0x80, F_CMD_ACCESS, 0, 0, D_SKL_PLUS,
+		NULL, NULL);
+
+	MMIO_DFH(GEN7_FF_SLICE_CS_CHICKEN1, D_SKL_PLUS,
+		 F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN9_CS_DEBUG_MODE1, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		NULL, NULL);
+
+	/* TRTT */
+	MMIO_DFH(TRVATTL3PTRDW(0), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(TRVATTL3PTRDW(1), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(TRVATTL3PTRDW(2), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(TRVATTL3PTRDW(3), D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(TRVADR, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(TRTTE, D_SKL_PLUS, F_CMD_ACCESS | F_PM_SAVE,
+		 NULL, gen9_trtte_write);
+	MMIO_DFH(_MMIO(0x4dfc), D_SKL_PLUS, F_PM_SAVE,
+		 NULL, gen9_trtt_chicken_write);
+
+	MMIO_DFH(GEN8_GARBCNTL, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write);
+
+#define CSFE_CHICKEN1_REG(base) _MMIO((base) + 0xD4)
+	MMIO_RING_DFH(CSFE_CHICKEN1_REG, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		      NULL, csfe_chicken1_mmio_write);
+#undef CSFE_CHICKEN1_REG
+	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		 NULL, NULL);
+	MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		 NULL, NULL);
+
+	MMIO_DFH(GAMT_CHKN_BIT_REG, D_KBL | D_CFL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe4cc), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+
+	return 0;
+}
+
+static int init_bxt_mmio_info(struct intel_gvt *gvt)
+{
+	int ret;
+
+	MMIO_DH(BXT_P_CR_GT_DISP_PWRON, D_BXT, NULL, bxt_gt_disp_pwron_write);
+	MMIO_DH(BXT_PHY_CTL_FAMILY(DPIO_PHY0), D_BXT,
+		NULL, bxt_phy_ctl_family_write);
+	MMIO_DH(BXT_PHY_CTL_FAMILY(DPIO_PHY1), D_BXT,
+		NULL, bxt_phy_ctl_family_write);
+	MMIO_DH(BXT_PORT_PLL_ENABLE(PORT_A), D_BXT,
+		NULL, bxt_port_pll_enable_write);
+	MMIO_DH(BXT_PORT_PLL_ENABLE(PORT_B), D_BXT,
+		NULL, bxt_port_pll_enable_write);
+	MMIO_DH(BXT_PORT_PLL_ENABLE(PORT_C), D_BXT, NULL,
+		bxt_port_pll_enable_write);
+
+	MMIO_DH(BXT_PORT_PCS_DW12_GRP(DPIO_PHY0, DPIO_CH0), D_BXT,
+		NULL, bxt_pcs_dw12_grp_write);
+	MMIO_DH(BXT_PORT_TX_DW3_LN0(DPIO_PHY0, DPIO_CH0), D_BXT,
+		bxt_port_tx_dw3_read, NULL);
+	MMIO_DH(BXT_PORT_PCS_DW12_GRP(DPIO_PHY0, DPIO_CH1), D_BXT,
+		NULL, bxt_pcs_dw12_grp_write);
+	MMIO_DH(BXT_PORT_TX_DW3_LN0(DPIO_PHY0, DPIO_CH1), D_BXT,
+		bxt_port_tx_dw3_read, NULL);
+	MMIO_DH(BXT_PORT_PCS_DW12_GRP(DPIO_PHY1, DPIO_CH0), D_BXT,
+		NULL, bxt_pcs_dw12_grp_write);
+	MMIO_DH(BXT_PORT_TX_DW3_LN0(DPIO_PHY1, DPIO_CH0), D_BXT,
+		bxt_port_tx_dw3_read, NULL);
+	MMIO_DH(BXT_DE_PLL_ENABLE, D_BXT, NULL, bxt_de_pll_enable_write);
+	MMIO_DFH(GEN8_L3SQCREG1, D_BXT, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(GEN8_L3CNTLREG, D_BXT, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x20D8), D_BXT, F_CMD_ACCESS, NULL, NULL);
+	MMIO_F(GEN8_RING_CS_GPR(RENDER_RING_BASE, 0), 0x40, F_CMD_ACCESS,
+	       0, 0, D_BXT, NULL, NULL);
+	MMIO_F(GEN8_RING_CS_GPR(GEN6_BSD_RING_BASE, 0), 0x40, F_CMD_ACCESS,
+	       0, 0, D_BXT, NULL, NULL);
+	MMIO_F(GEN8_RING_CS_GPR(BLT_RING_BASE, 0), 0x40, F_CMD_ACCESS,
+	       0, 0, D_BXT, NULL, NULL);
+	MMIO_F(GEN8_RING_CS_GPR(VEBOX_RING_BASE, 0), 0x40, F_CMD_ACCESS,
+	       0, 0, D_BXT, NULL, NULL);
+
+	MMIO_DFH(GEN9_CTX_PREEMPT_REG, D_BXT, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DH(GEN8_PRIVATE_PAT_LO, D_BXT, NULL, bxt_ppat_low_write);
+
+	return 0;
+}
+
+static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt,
+					      unsigned int offset)
+{
+	struct gvt_mmio_block *block = gvt->mmio.mmio_block;
+	int num = gvt->mmio.num_mmio_block;
+	int i;
+
+	for (i = 0; i < num; i++, block++) {
+		if (offset >= i915_mmio_reg_offset(block->offset) &&
+		    offset < i915_mmio_reg_offset(block->offset) + block->size)
+			return block;
+	}
+	return NULL;
+}
+
+/**
+ * intel_gvt_clean_mmio_info - clean up MMIO information table for GVT device
+ * @gvt: GVT device
+ *
+ * This function is called at the driver unloading stage, to clean up the MMIO
+ * information table of GVT device
+ *
+ */
+void intel_gvt_clean_mmio_info(struct intel_gvt *gvt)
+{
+	struct hlist_node *tmp;
+	struct intel_gvt_mmio_info *e;
+	int i;
+
+	hash_for_each_safe(gvt->mmio.mmio_info_table, i, tmp, e, node)
+		kfree(e);
+
+	kfree(gvt->mmio.mmio_block);
+	gvt->mmio.mmio_block = NULL;
+	gvt->mmio.num_mmio_block = 0;
+
+	vfree(gvt->mmio.mmio_attribute);
+	gvt->mmio.mmio_attribute = NULL;
+}
+
+static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset,
+		       u32 size)
+{
+	struct intel_gvt *gvt = iter->data;
+	struct intel_gvt_mmio_info *info, *p;
+	u32 start, end, i;
+
+	if (WARN_ON(!IS_ALIGNED(offset, 4)))
+		return -EINVAL;
+
+	start = offset;
+	end = offset + size;
+
+	for (i = start; i < end; i += 4) {
+		p = intel_gvt_find_mmio_info(gvt, i);
+		if (p) {
+			WARN(1, "dup mmio definition offset %x\n",
+				info->offset);
+
+			/* We return -EEXIST here to make GVT-g load fail.
+			 * So duplicated MMIO can be found as soon as
+			 * possible.
+			 */
+			return -EEXIST;
+		}
+
+		info = kzalloc(sizeof(*info), GFP_KERNEL);
+		if (!info)
+			return -ENOMEM;
+
+		info->offset = i;
+		info->read = intel_vgpu_default_mmio_read;
+		info->write = intel_vgpu_default_mmio_write;
+		INIT_HLIST_NODE(&info->node);
+		hash_add(gvt->mmio.mmio_info_table, &info->node, info->offset);
+		gvt->mmio.num_tracked_mmio++;
+	}
+	return 0;
+}
+
+static int handle_mmio_block(struct intel_gvt_mmio_table_iter *iter,
+			     u32 offset, u32 size)
+{
+	struct intel_gvt *gvt = iter->data;
+	struct gvt_mmio_block *block = gvt->mmio.mmio_block;
+	void *ret;
+
+	ret = krealloc(block,
+			 (gvt->mmio.num_mmio_block + 1) * sizeof(*block),
+			 GFP_KERNEL);
+	if (!ret)
+		return -ENOMEM;
+
+	gvt->mmio.mmio_block = block = ret;
+
+	block += gvt->mmio.num_mmio_block;
+
+	memset(block, 0, sizeof(*block));
+
+	block->offset = _MMIO(offset);
+	block->size = size;
+
+	gvt->mmio.num_mmio_block++;
+
+	return 0;
+}
+
+static int handle_mmio_cb(struct intel_gvt_mmio_table_iter *iter, u32 offset,
+			  u32 size)
+{
+	if (size < 1024 || offset == i915_mmio_reg_offset(GEN9_GFX_MOCS(0)))
+		return handle_mmio(iter, offset, size);
+	else
+		return handle_mmio_block(iter, offset, size);
+}
+
+static int init_mmio_info(struct intel_gvt *gvt)
+{
+	struct intel_gvt_mmio_table_iter iter = {
+		.i915 = gvt->gt->i915,
+		.data = gvt,
+		.handle_mmio_cb = handle_mmio_cb,
+	};
+
+	return intel_gvt_iterate_mmio_table(&iter);
+}
+
+static int init_mmio_block_handlers(struct intel_gvt *gvt)
+{
+	struct gvt_mmio_block *block;
+
+	block = find_mmio_block(gvt, VGT_PVINFO_PAGE);
+	if (!block) {
+		WARN(1, "fail to assign handlers to mmio block %x\n",
+		     i915_mmio_reg_offset(gvt->mmio.mmio_block->offset));
+		return -ENODEV;
+	}
+
+	block->read = pvinfo_mmio_read;
+	block->write = pvinfo_mmio_write;
+
+	return 0;
+}
+
+/**
+ * intel_gvt_setup_mmio_info - setup MMIO information table for GVT device
+ * @gvt: GVT device
+ *
+ * This function is called at the initialization stage, to setup the MMIO
+ * information table for GVT device
+ *
+ * Returns:
+ * zero on success, negative if failed.
+ */
+int intel_gvt_setup_mmio_info(struct intel_gvt *gvt)
+{
+	struct intel_gvt_device_info *info = &gvt->device_info;
+	struct drm_i915_private *i915 = gvt->gt->i915;
+	int size = info->mmio_size / 4 * sizeof(*gvt->mmio.mmio_attribute);
+	int ret;
+
+	gvt->mmio.mmio_attribute = vzalloc(size);
+	if (!gvt->mmio.mmio_attribute)
+		return -ENOMEM;
+
+	ret = init_mmio_info(gvt);
+	if (ret)
+		goto err;
+
+	ret = init_mmio_block_handlers(gvt);
+	if (ret)
+		goto err;
+
+	ret = init_generic_mmio_info(gvt);
+	if (ret)
+		goto err;
+
+	if (IS_BROADWELL(i915)) {
+		ret = init_bdw_mmio_info(gvt);
+		if (ret)
+			goto err;
+	} else if (IS_SKYLAKE(i915) ||
+		   IS_KABYLAKE(i915) ||
+		   IS_COFFEELAKE(i915) ||
+		   IS_COMETLAKE(i915)) {
+		ret = init_bdw_mmio_info(gvt);
+		if (ret)
+			goto err;
+		ret = init_skl_mmio_info(gvt);
+		if (ret)
+			goto err;
+	} else if (IS_BROXTON(i915)) {
+		ret = init_bdw_mmio_info(gvt);
+		if (ret)
+			goto err;
+		ret = init_skl_mmio_info(gvt);
+		if (ret)
+			goto err;
+		ret = init_bxt_mmio_info(gvt);
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+err:
+	intel_gvt_clean_mmio_info(gvt);
+	return ret;
+}
+
+/**
+ * intel_gvt_for_each_tracked_mmio - iterate each tracked mmio
+ * @gvt: a GVT device
+ * @handler: the handler
+ * @data: private data given to handler
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt,
+	int (*handler)(struct intel_gvt *gvt, u32 offset, void *data),
+	void *data)
+{
+	struct gvt_mmio_block *block = gvt->mmio.mmio_block;
+	struct intel_gvt_mmio_info *e;
+	int i, j, ret;
+
+	hash_for_each(gvt->mmio.mmio_info_table, i, e, node) {
+		ret = handler(gvt, e->offset, data);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) {
+		/* pvinfo data doesn't come from hw mmio */
+		if (i915_mmio_reg_offset(block->offset) == VGT_PVINFO_PAGE)
+			continue;
+
+		for (j = 0; j < block->size; j += 4) {
+			ret = handler(gvt, i915_mmio_reg_offset(block->offset) + j, data);
+			if (ret)
+				return ret;
+		}
+	}
+	return 0;
+}
+
+/**
+ * intel_vgpu_default_mmio_read - default MMIO read handler
+ * @vgpu: a vGPU
+ * @offset: access offset
+ * @p_data: data return buffer
+ * @bytes: access data length
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	read_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
+/**
+ * intel_vgpu_default_mmio_write() - default MMIO write handler
+ * @vgpu: a vGPU
+ * @offset: access offset
+ * @p_data: write data buffer
+ * @bytes: access data length
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
+/**
+ * intel_vgpu_mask_mmio_write - write mask register
+ * @vgpu: a vGPU
+ * @offset: access offset
+ * @p_data: write data buffer
+ * @bytes: access data length
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_mask_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	u32 mask, old_vreg;
+
+	old_vreg = vgpu_vreg(vgpu, offset);
+	write_vreg(vgpu, offset, p_data, bytes);
+	mask = vgpu_vreg(vgpu, offset) >> 16;
+	vgpu_vreg(vgpu, offset) = (old_vreg & ~mask) |
+				(vgpu_vreg(vgpu, offset) & mask);
+
+	return 0;
+}
+
+/**
+ * intel_gvt_in_force_nonpriv_whitelist - if a mmio is in whitelist to be
+ * force-nopriv register
+ *
+ * @gvt: a GVT device
+ * @offset: register offset
+ *
+ * Returns:
+ * True if the register is in force-nonpriv whitelist;
+ * False if outside;
+ */
+bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
+					  unsigned int offset)
+{
+	return in_whitelist(offset);
+}
+
+/**
+ * intel_vgpu_mmio_reg_rw - emulate tracked mmio registers
+ * @vgpu: a vGPU
+ * @offset: register offset
+ * @pdata: data buffer
+ * @bytes: data length
+ * @is_read: read or write
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset,
+			   void *pdata, unsigned int bytes, bool is_read)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gvt_mmio_info *mmio_info;
+	struct gvt_mmio_block *mmio_block;
+	gvt_mmio_func func;
+	int ret;
+
+	if (drm_WARN_ON(&i915->drm, bytes > 8))
+		return -EINVAL;
+
+	/*
+	 * Handle special MMIO blocks.
+	 */
+	mmio_block = find_mmio_block(gvt, offset);
+	if (mmio_block) {
+		func = is_read ? mmio_block->read : mmio_block->write;
+		if (func)
+			return func(vgpu, offset, pdata, bytes);
+		goto default_rw;
+	}
+
+	/*
+	 * Normal tracked MMIOs.
+	 */
+	mmio_info = intel_gvt_find_mmio_info(gvt, offset);
+	if (!mmio_info) {
+		gvt_dbg_mmio("untracked MMIO %08x len %d\n", offset, bytes);
+		goto default_rw;
+	}
+
+	if (is_read)
+		return mmio_info->read(vgpu, offset, pdata, bytes);
+	else {
+		u64 ro_mask = mmio_info->ro_mask;
+		u32 old_vreg = 0;
+		u64 data = 0;
+
+		if (intel_gvt_mmio_has_mode_mask(gvt, mmio_info->offset)) {
+			old_vreg = vgpu_vreg(vgpu, offset);
+		}
+
+		if (likely(!ro_mask))
+			ret = mmio_info->write(vgpu, offset, pdata, bytes);
+		else if (!~ro_mask) {
+			gvt_vgpu_err("try to write RO reg %x\n", offset);
+			return 0;
+		} else {
+			/* keep the RO bits in the virtual register */
+			memcpy(&data, pdata, bytes);
+			data &= ~ro_mask;
+			data |= vgpu_vreg(vgpu, offset) & ro_mask;
+			ret = mmio_info->write(vgpu, offset, &data, bytes);
+		}
+
+		/* higher 16bits of mode ctl regs are mask bits for change */
+		if (intel_gvt_mmio_has_mode_mask(gvt, mmio_info->offset)) {
+			u32 mask = vgpu_vreg(vgpu, offset) >> 16;
+
+			vgpu_vreg(vgpu, offset) = (old_vreg & ~mask)
+					| (vgpu_vreg(vgpu, offset) & mask);
+		}
+	}
+
+	return ret;
+
+default_rw:
+	return is_read ?
+		intel_vgpu_default_mmio_read(vgpu, offset, pdata, bytes) :
+		intel_vgpu_default_mmio_write(vgpu, offset, pdata, bytes);
+}
+
+void intel_gvt_restore_fence(struct intel_gvt *gvt)
+{
+	struct intel_vgpu *vgpu;
+	int i, id;
+
+	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
+		mmio_hw_access_pre(gvt->gt);
+		for (i = 0; i < vgpu_fence_sz(vgpu); i++)
+			intel_vgpu_write_fence(vgpu, i, vgpu_vreg64(vgpu, fence_num_to_offset(i)));
+		mmio_hw_access_post(gvt->gt);
+	}
+}
+
+static int mmio_pm_restore_handler(struct intel_gvt *gvt, u32 offset, void *data)
+{
+	struct intel_vgpu *vgpu = data;
+	struct drm_i915_private *dev_priv = gvt->gt->i915;
+
+	if (gvt->mmio.mmio_attribute[offset >> 2] & F_PM_SAVE)
+		intel_uncore_write(&dev_priv->uncore, _MMIO(offset), vgpu_vreg(vgpu, offset));
+
+	return 0;
+}
+
+void intel_gvt_restore_mmio(struct intel_gvt *gvt)
+{
+	struct intel_vgpu *vgpu;
+	int id;
+
+	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
+		mmio_hw_access_pre(gvt->gt);
+		intel_gvt_for_each_tracked_mmio(gvt, mmio_pm_restore_handler, vgpu);
+		mmio_hw_access_post(gvt->gt);
+	}
+}
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c
new file mode 100644
index 0000000000..68eca023bb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@@ -0,0 +1,717 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ * Contributors:
+ *    Min he <min.he@intel.com>
+ *
+ */
+
+#include <linux/eventfd.h>
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gvt.h"
+#include "trace.h"
+
+/* common offset among interrupt control registers */
+#define regbase_to_isr(base)	(base)
+#define regbase_to_imr(base)	(base + 0x4)
+#define regbase_to_iir(base)	(base + 0x8)
+#define regbase_to_ier(base)	(base + 0xC)
+
+#define iir_to_regbase(iir)    (iir - 0x8)
+#define ier_to_regbase(ier)    (ier - 0xC)
+
+#define get_event_virt_handler(irq, e)	(irq->events[e].v_handler)
+#define get_irq_info(irq, e)		(irq->events[e].info)
+
+#define irq_to_gvt(irq) \
+	container_of(irq, struct intel_gvt, irq)
+
+static void update_upstream_irq(struct intel_vgpu *vgpu,
+		struct intel_gvt_irq_info *info);
+
+static const char * const irq_name[INTEL_GVT_EVENT_MAX] = {
+	[RCS_MI_USER_INTERRUPT] = "Render CS MI USER INTERRUPT",
+	[RCS_DEBUG] = "Render EU debug from SVG",
+	[RCS_MMIO_SYNC_FLUSH] = "Render MMIO sync flush status",
+	[RCS_CMD_STREAMER_ERR] = "Render CS error interrupt",
+	[RCS_PIPE_CONTROL] = "Render PIPE CONTROL notify",
+	[RCS_WATCHDOG_EXCEEDED] = "Render CS Watchdog counter exceeded",
+	[RCS_PAGE_DIRECTORY_FAULT] = "Render page directory faults",
+	[RCS_AS_CONTEXT_SWITCH] = "Render AS Context Switch Interrupt",
+
+	[VCS_MI_USER_INTERRUPT] = "Video CS MI USER INTERRUPT",
+	[VCS_MMIO_SYNC_FLUSH] = "Video MMIO sync flush status",
+	[VCS_CMD_STREAMER_ERR] = "Video CS error interrupt",
+	[VCS_MI_FLUSH_DW] = "Video MI FLUSH DW notify",
+	[VCS_WATCHDOG_EXCEEDED] = "Video CS Watchdog counter exceeded",
+	[VCS_PAGE_DIRECTORY_FAULT] = "Video page directory faults",
+	[VCS_AS_CONTEXT_SWITCH] = "Video AS Context Switch Interrupt",
+	[VCS2_MI_USER_INTERRUPT] = "VCS2 Video CS MI USER INTERRUPT",
+	[VCS2_MI_FLUSH_DW] = "VCS2 Video MI FLUSH DW notify",
+	[VCS2_AS_CONTEXT_SWITCH] = "VCS2 Context Switch Interrupt",
+
+	[BCS_MI_USER_INTERRUPT] = "Blitter CS MI USER INTERRUPT",
+	[BCS_MMIO_SYNC_FLUSH] = "Billter MMIO sync flush status",
+	[BCS_CMD_STREAMER_ERR] = "Blitter CS error interrupt",
+	[BCS_MI_FLUSH_DW] = "Blitter MI FLUSH DW notify",
+	[BCS_PAGE_DIRECTORY_FAULT] = "Blitter page directory faults",
+	[BCS_AS_CONTEXT_SWITCH] = "Blitter AS Context Switch Interrupt",
+
+	[VECS_MI_FLUSH_DW] = "Video Enhanced Streamer MI FLUSH DW notify",
+	[VECS_AS_CONTEXT_SWITCH] = "VECS Context Switch Interrupt",
+
+	[PIPE_A_FIFO_UNDERRUN] = "Pipe A FIFO underrun",
+	[PIPE_A_CRC_ERR] = "Pipe A CRC error",
+	[PIPE_A_CRC_DONE] = "Pipe A CRC done",
+	[PIPE_A_VSYNC] = "Pipe A vsync",
+	[PIPE_A_LINE_COMPARE] = "Pipe A line compare",
+	[PIPE_A_ODD_FIELD] = "Pipe A odd field",
+	[PIPE_A_EVEN_FIELD] = "Pipe A even field",
+	[PIPE_A_VBLANK] = "Pipe A vblank",
+	[PIPE_B_FIFO_UNDERRUN] = "Pipe B FIFO underrun",
+	[PIPE_B_CRC_ERR] = "Pipe B CRC error",
+	[PIPE_B_CRC_DONE] = "Pipe B CRC done",
+	[PIPE_B_VSYNC] = "Pipe B vsync",
+	[PIPE_B_LINE_COMPARE] = "Pipe B line compare",
+	[PIPE_B_ODD_FIELD] = "Pipe B odd field",
+	[PIPE_B_EVEN_FIELD] = "Pipe B even field",
+	[PIPE_B_VBLANK] = "Pipe B vblank",
+	[PIPE_C_VBLANK] = "Pipe C vblank",
+	[DPST_PHASE_IN] = "DPST phase in event",
+	[DPST_HISTOGRAM] = "DPST histogram event",
+	[GSE] = "GSE",
+	[DP_A_HOTPLUG] = "DP A Hotplug",
+	[AUX_CHANNEL_A] = "AUX Channel A",
+	[PERF_COUNTER] = "Performance counter",
+	[POISON] = "Poison",
+	[GTT_FAULT] = "GTT fault",
+	[PRIMARY_A_FLIP_DONE] = "Primary Plane A flip done",
+	[PRIMARY_B_FLIP_DONE] = "Primary Plane B flip done",
+	[PRIMARY_C_FLIP_DONE] = "Primary Plane C flip done",
+	[SPRITE_A_FLIP_DONE] = "Sprite Plane A flip done",
+	[SPRITE_B_FLIP_DONE] = "Sprite Plane B flip done",
+	[SPRITE_C_FLIP_DONE] = "Sprite Plane C flip done",
+
+	[PCU_THERMAL] = "PCU Thermal Event",
+	[PCU_PCODE2DRIVER_MAILBOX] = "PCU pcode2driver mailbox event",
+
+	[FDI_RX_INTERRUPTS_TRANSCODER_A] = "FDI RX Interrupts Combined A",
+	[AUDIO_CP_CHANGE_TRANSCODER_A] = "Audio CP Change Transcoder A",
+	[AUDIO_CP_REQUEST_TRANSCODER_A] = "Audio CP Request Transcoder A",
+	[FDI_RX_INTERRUPTS_TRANSCODER_B] = "FDI RX Interrupts Combined B",
+	[AUDIO_CP_CHANGE_TRANSCODER_B] = "Audio CP Change Transcoder B",
+	[AUDIO_CP_REQUEST_TRANSCODER_B] = "Audio CP Request Transcoder B",
+	[FDI_RX_INTERRUPTS_TRANSCODER_C] = "FDI RX Interrupts Combined C",
+	[AUDIO_CP_CHANGE_TRANSCODER_C] = "Audio CP Change Transcoder C",
+	[AUDIO_CP_REQUEST_TRANSCODER_C] = "Audio CP Request Transcoder C",
+	[ERR_AND_DBG] = "South Error and Debug Interrupts Combined",
+	[GMBUS] = "Gmbus",
+	[SDVO_B_HOTPLUG] = "SDVO B hotplug",
+	[CRT_HOTPLUG] = "CRT Hotplug",
+	[DP_B_HOTPLUG] = "DisplayPort/HDMI/DVI B Hotplug",
+	[DP_C_HOTPLUG] = "DisplayPort/HDMI/DVI C Hotplug",
+	[DP_D_HOTPLUG] = "DisplayPort/HDMI/DVI D Hotplug",
+	[AUX_CHANNEL_B] = "AUX Channel B",
+	[AUX_CHANNEL_C] = "AUX Channel C",
+	[AUX_CHANNEL_D] = "AUX Channel D",
+	[AUDIO_POWER_STATE_CHANGE_B] = "Audio Power State change Port B",
+	[AUDIO_POWER_STATE_CHANGE_C] = "Audio Power State change Port C",
+	[AUDIO_POWER_STATE_CHANGE_D] = "Audio Power State change Port D",
+
+	[INTEL_GVT_EVENT_RESERVED] = "RESERVED EVENTS!!!",
+};
+
+static inline struct intel_gvt_irq_info *regbase_to_irq_info(
+		struct intel_gvt *gvt,
+		unsigned int reg)
+{
+	struct intel_gvt_irq *irq = &gvt->irq;
+	int i;
+
+	for_each_set_bit(i, irq->irq_info_bitmap, INTEL_GVT_IRQ_INFO_MAX) {
+		if (i915_mmio_reg_offset(irq->info[i]->reg_base) == reg)
+			return irq->info[i];
+	}
+
+	return NULL;
+}
+
+/**
+ * intel_vgpu_reg_imr_handler - Generic IMR register emulation write handler
+ * @vgpu: a vGPU
+ * @reg: register offset written by guest
+ * @p_data: register data written by guest
+ * @bytes: register data length
+ *
+ * This function is used to emulate the generic IMR register bit change
+ * behavior.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_vgpu_reg_imr_handler(struct intel_vgpu *vgpu,
+	unsigned int reg, void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	const struct intel_gvt_irq_ops *ops = gvt->irq.ops;
+	u32 imr = *(u32 *)p_data;
+
+	trace_write_ir(vgpu->id, "IMR", reg, imr, vgpu_vreg(vgpu, reg),
+		       (vgpu_vreg(vgpu, reg) ^ imr));
+
+	vgpu_vreg(vgpu, reg) = imr;
+
+	ops->check_pending_irq(vgpu);
+
+	return 0;
+}
+
+/**
+ * intel_vgpu_reg_master_irq_handler - master IRQ write emulation handler
+ * @vgpu: a vGPU
+ * @reg: register offset written by guest
+ * @p_data: register data written by guest
+ * @bytes: register data length
+ *
+ * This function is used to emulate the master IRQ register on gen8+.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_vgpu_reg_master_irq_handler(struct intel_vgpu *vgpu,
+	unsigned int reg, void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	const struct intel_gvt_irq_ops *ops = gvt->irq.ops;
+	u32 ier = *(u32 *)p_data;
+	u32 virtual_ier = vgpu_vreg(vgpu, reg);
+
+	trace_write_ir(vgpu->id, "MASTER_IRQ", reg, ier, virtual_ier,
+		       (virtual_ier ^ ier));
+
+	/*
+	 * GEN8_MASTER_IRQ is a special irq register,
+	 * only bit 31 is allowed to be modified
+	 * and treated as an IER bit.
+	 */
+	ier &= GEN8_MASTER_IRQ_CONTROL;
+	virtual_ier &= GEN8_MASTER_IRQ_CONTROL;
+	vgpu_vreg(vgpu, reg) &= ~GEN8_MASTER_IRQ_CONTROL;
+	vgpu_vreg(vgpu, reg) |= ier;
+
+	ops->check_pending_irq(vgpu);
+
+	return 0;
+}
+
+/**
+ * intel_vgpu_reg_ier_handler - Generic IER write emulation handler
+ * @vgpu: a vGPU
+ * @reg: register offset written by guest
+ * @p_data: register data written by guest
+ * @bytes: register data length
+ *
+ * This function is used to emulate the generic IER register behavior.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_vgpu_reg_ier_handler(struct intel_vgpu *vgpu,
+	unsigned int reg, void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_i915_private *i915 = gvt->gt->i915;
+	const struct intel_gvt_irq_ops *ops = gvt->irq.ops;
+	struct intel_gvt_irq_info *info;
+	u32 ier = *(u32 *)p_data;
+
+	trace_write_ir(vgpu->id, "IER", reg, ier, vgpu_vreg(vgpu, reg),
+		       (vgpu_vreg(vgpu, reg) ^ ier));
+
+	vgpu_vreg(vgpu, reg) = ier;
+
+	info = regbase_to_irq_info(gvt, ier_to_regbase(reg));
+	if (drm_WARN_ON(&i915->drm, !info))
+		return -EINVAL;
+
+	if (info->has_upstream_irq)
+		update_upstream_irq(vgpu, info);
+
+	ops->check_pending_irq(vgpu);
+
+	return 0;
+}
+
+/**
+ * intel_vgpu_reg_iir_handler - Generic IIR write emulation handler
+ * @vgpu: a vGPU
+ * @reg: register offset written by guest
+ * @p_data: register data written by guest
+ * @bytes: register data length
+ *
+ * This function is used to emulate the generic IIR register behavior.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_vgpu_reg_iir_handler(struct intel_vgpu *vgpu, unsigned int reg,
+	void *p_data, unsigned int bytes)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_gvt_irq_info *info = regbase_to_irq_info(vgpu->gvt,
+		iir_to_regbase(reg));
+	u32 iir = *(u32 *)p_data;
+
+	trace_write_ir(vgpu->id, "IIR", reg, iir, vgpu_vreg(vgpu, reg),
+		       (vgpu_vreg(vgpu, reg) ^ iir));
+
+	if (drm_WARN_ON(&i915->drm, !info))
+		return -EINVAL;
+
+	vgpu_vreg(vgpu, reg) &= ~iir;
+
+	if (info->has_upstream_irq)
+		update_upstream_irq(vgpu, info);
+	return 0;
+}
+
+static struct intel_gvt_irq_map gen8_irq_map[] = {
+	{ INTEL_GVT_IRQ_INFO_MASTER, 0, INTEL_GVT_IRQ_INFO_GT0, 0xffff },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 1, INTEL_GVT_IRQ_INFO_GT0, 0xffff0000 },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 2, INTEL_GVT_IRQ_INFO_GT1, 0xffff },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 3, INTEL_GVT_IRQ_INFO_GT1, 0xffff0000 },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 4, INTEL_GVT_IRQ_INFO_GT2, 0xffff },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 6, INTEL_GVT_IRQ_INFO_GT3, 0xffff },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 16, INTEL_GVT_IRQ_INFO_DE_PIPE_A, ~0 },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 17, INTEL_GVT_IRQ_INFO_DE_PIPE_B, ~0 },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 18, INTEL_GVT_IRQ_INFO_DE_PIPE_C, ~0 },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 20, INTEL_GVT_IRQ_INFO_DE_PORT, ~0 },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 22, INTEL_GVT_IRQ_INFO_DE_MISC, ~0 },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 23, INTEL_GVT_IRQ_INFO_PCH, ~0 },
+	{ INTEL_GVT_IRQ_INFO_MASTER, 30, INTEL_GVT_IRQ_INFO_PCU, ~0 },
+	{ -1, -1, ~0 },
+};
+
+static void update_upstream_irq(struct intel_vgpu *vgpu,
+		struct intel_gvt_irq_info *info)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_gvt_irq *irq = &vgpu->gvt->irq;
+	struct intel_gvt_irq_map *map = irq->irq_map;
+	struct intel_gvt_irq_info *up_irq_info = NULL;
+	u32 set_bits = 0;
+	u32 clear_bits = 0;
+	int bit;
+	u32 val = vgpu_vreg(vgpu,
+			regbase_to_iir(i915_mmio_reg_offset(info->reg_base)))
+		& vgpu_vreg(vgpu,
+			regbase_to_ier(i915_mmio_reg_offset(info->reg_base)));
+
+	if (!info->has_upstream_irq)
+		return;
+
+	for (map = irq->irq_map; map->up_irq_bit != -1; map++) {
+		if (info->group != map->down_irq_group)
+			continue;
+
+		if (!up_irq_info)
+			up_irq_info = irq->info[map->up_irq_group];
+		else
+			drm_WARN_ON(&i915->drm, up_irq_info !=
+				    irq->info[map->up_irq_group]);
+
+		bit = map->up_irq_bit;
+
+		if (val & map->down_irq_bitmask)
+			set_bits |= (1 << bit);
+		else
+			clear_bits |= (1 << bit);
+	}
+
+	if (drm_WARN_ON(&i915->drm, !up_irq_info))
+		return;
+
+	if (up_irq_info->group == INTEL_GVT_IRQ_INFO_MASTER) {
+		u32 isr = i915_mmio_reg_offset(up_irq_info->reg_base);
+
+		vgpu_vreg(vgpu, isr) &= ~clear_bits;
+		vgpu_vreg(vgpu, isr) |= set_bits;
+	} else {
+		u32 iir = regbase_to_iir(
+			i915_mmio_reg_offset(up_irq_info->reg_base));
+		u32 imr = regbase_to_imr(
+			i915_mmio_reg_offset(up_irq_info->reg_base));
+
+		vgpu_vreg(vgpu, iir) |= (set_bits & ~vgpu_vreg(vgpu, imr));
+	}
+
+	if (up_irq_info->has_upstream_irq)
+		update_upstream_irq(vgpu, up_irq_info);
+}
+
+static void init_irq_map(struct intel_gvt_irq *irq)
+{
+	struct intel_gvt_irq_map *map;
+	struct intel_gvt_irq_info *up_info, *down_info;
+	int up_bit;
+
+	for (map = irq->irq_map; map->up_irq_bit != -1; map++) {
+		up_info = irq->info[map->up_irq_group];
+		up_bit = map->up_irq_bit;
+		down_info = irq->info[map->down_irq_group];
+
+		set_bit(up_bit, up_info->downstream_irq_bitmap);
+		down_info->has_upstream_irq = true;
+
+		gvt_dbg_irq("[up] grp %d bit %d -> [down] grp %d bitmask %x\n",
+			up_info->group, up_bit,
+			down_info->group, map->down_irq_bitmask);
+	}
+}
+
+/* =======================vEvent injection===================== */
+
+#define MSI_CAP_CONTROL(offset) (offset + 2)
+#define MSI_CAP_ADDRESS(offset) (offset + 4)
+#define MSI_CAP_DATA(offset) (offset + 8)
+#define MSI_CAP_EN 0x1
+
+static int inject_virtual_interrupt(struct intel_vgpu *vgpu)
+{
+	unsigned long offset = vgpu->gvt->device_info.msi_cap_offset;
+	u16 control, data;
+	u32 addr;
+
+	control = *(u16 *)(vgpu_cfg_space(vgpu) + MSI_CAP_CONTROL(offset));
+	addr = *(u32 *)(vgpu_cfg_space(vgpu) + MSI_CAP_ADDRESS(offset));
+	data = *(u16 *)(vgpu_cfg_space(vgpu) + MSI_CAP_DATA(offset));
+
+	/* Do not generate MSI if MSIEN is disabled */
+	if (!(control & MSI_CAP_EN))
+		return 0;
+
+	if (WARN(control & GENMASK(15, 1), "only support one MSI format\n"))
+		return -EINVAL;
+
+	trace_inject_msi(vgpu->id, addr, data);
+
+	/*
+	 * When guest is powered off, msi_trigger is set to NULL, but vgpu's
+	 * config and mmio register isn't restored to default during guest
+	 * poweroff. If this vgpu is still used in next vm, this vgpu's pipe
+	 * may be enabled, then once this vgpu is active, it will get inject
+	 * vblank interrupt request. But msi_trigger is null until msi is
+	 * enabled by guest. so if msi_trigger is null, success is still
+	 * returned and don't inject interrupt into guest.
+	 */
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+		return -ESRCH;
+	if (vgpu->msi_trigger && eventfd_signal(vgpu->msi_trigger, 1) != 1)
+		return -EFAULT;
+	return 0;
+}
+
+static void propagate_event(struct intel_gvt_irq *irq,
+	enum intel_gvt_event_type event, struct intel_vgpu *vgpu)
+{
+	struct intel_gvt_irq_info *info;
+	unsigned int reg_base;
+	int bit;
+
+	info = get_irq_info(irq, event);
+	if (WARN_ON(!info))
+		return;
+
+	reg_base = i915_mmio_reg_offset(info->reg_base);
+	bit = irq->events[event].bit;
+
+	if (!test_bit(bit, (void *)&vgpu_vreg(vgpu,
+					regbase_to_imr(reg_base)))) {
+		trace_propagate_event(vgpu->id, irq_name[event], bit);
+		set_bit(bit, (void *)&vgpu_vreg(vgpu,
+					regbase_to_iir(reg_base)));
+	}
+}
+
+/* =======================vEvent Handlers===================== */
+static void handle_default_event_virt(struct intel_gvt_irq *irq,
+	enum intel_gvt_event_type event, struct intel_vgpu *vgpu)
+{
+	if (!vgpu->irq.irq_warn_once[event]) {
+		gvt_dbg_core("vgpu%d: IRQ receive event %d (%s)\n",
+			vgpu->id, event, irq_name[event]);
+		vgpu->irq.irq_warn_once[event] = true;
+	}
+	propagate_event(irq, event, vgpu);
+}
+
+/* =====================GEN specific logic======================= */
+/* GEN8 interrupt routines. */
+
+#define DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(regname, regbase) \
+static struct intel_gvt_irq_info gen8_##regname##_info = { \
+	.name = #regname"-IRQ", \
+	.reg_base = (regbase), \
+	.bit_to_event = {[0 ... INTEL_GVT_IRQ_BITWIDTH-1] = \
+		INTEL_GVT_EVENT_RESERVED}, \
+}
+
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(gt0, GEN8_GT_ISR(0));
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(gt1, GEN8_GT_ISR(1));
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(gt2, GEN8_GT_ISR(2));
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(gt3, GEN8_GT_ISR(3));
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(de_pipe_a, GEN8_DE_PIPE_ISR(PIPE_A));
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(de_pipe_b, GEN8_DE_PIPE_ISR(PIPE_B));
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(de_pipe_c, GEN8_DE_PIPE_ISR(PIPE_C));
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(de_port, GEN8_DE_PORT_ISR);
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(de_misc, GEN8_DE_MISC_ISR);
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(pcu, GEN8_PCU_ISR);
+DEFINE_GVT_GEN8_INTEL_GVT_IRQ_INFO(master, GEN8_MASTER_IRQ);
+
+static struct intel_gvt_irq_info gvt_base_pch_info = {
+	.name = "PCH-IRQ",
+	.reg_base = SDEISR,
+	.bit_to_event = {[0 ... INTEL_GVT_IRQ_BITWIDTH-1] =
+		INTEL_GVT_EVENT_RESERVED},
+};
+
+static void gen8_check_pending_irq(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt_irq *irq = &vgpu->gvt->irq;
+	int i;
+
+	if (!(vgpu_vreg(vgpu, i915_mmio_reg_offset(GEN8_MASTER_IRQ)) &
+				GEN8_MASTER_IRQ_CONTROL))
+		return;
+
+	for_each_set_bit(i, irq->irq_info_bitmap, INTEL_GVT_IRQ_INFO_MAX) {
+		struct intel_gvt_irq_info *info = irq->info[i];
+		u32 reg_base;
+
+		if (!info->has_upstream_irq)
+			continue;
+
+		reg_base = i915_mmio_reg_offset(info->reg_base);
+		if ((vgpu_vreg(vgpu, regbase_to_iir(reg_base))
+				& vgpu_vreg(vgpu, regbase_to_ier(reg_base))))
+			update_upstream_irq(vgpu, info);
+	}
+
+	if (vgpu_vreg(vgpu, i915_mmio_reg_offset(GEN8_MASTER_IRQ))
+			& ~GEN8_MASTER_IRQ_CONTROL)
+		inject_virtual_interrupt(vgpu);
+}
+
+static void gen8_init_irq(
+		struct intel_gvt_irq *irq)
+{
+	struct intel_gvt *gvt = irq_to_gvt(irq);
+
+#define SET_BIT_INFO(s, b, e, i)		\
+	do {					\
+		s->events[e].bit = b;		\
+		s->events[e].info = s->info[i];	\
+		s->info[i]->bit_to_event[b] = e;\
+	} while (0)
+
+#define SET_IRQ_GROUP(s, g, i) \
+	do { \
+		s->info[g] = i; \
+		(i)->group = g; \
+		set_bit(g, s->irq_info_bitmap); \
+	} while (0)
+
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_MASTER, &gen8_master_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_GT0, &gen8_gt0_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_GT1, &gen8_gt1_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_GT2, &gen8_gt2_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_GT3, &gen8_gt3_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_DE_PIPE_A, &gen8_de_pipe_a_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_DE_PIPE_B, &gen8_de_pipe_b_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_DE_PIPE_C, &gen8_de_pipe_c_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_DE_PORT, &gen8_de_port_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_DE_MISC, &gen8_de_misc_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_PCU, &gen8_pcu_info);
+	SET_IRQ_GROUP(irq, INTEL_GVT_IRQ_INFO_PCH, &gvt_base_pch_info);
+
+	/* GEN8 level 2 interrupts. */
+
+	/* GEN8 interrupt GT0 events */
+	SET_BIT_INFO(irq, 0, RCS_MI_USER_INTERRUPT, INTEL_GVT_IRQ_INFO_GT0);
+	SET_BIT_INFO(irq, 4, RCS_PIPE_CONTROL, INTEL_GVT_IRQ_INFO_GT0);
+	SET_BIT_INFO(irq, 8, RCS_AS_CONTEXT_SWITCH, INTEL_GVT_IRQ_INFO_GT0);
+
+	SET_BIT_INFO(irq, 16, BCS_MI_USER_INTERRUPT, INTEL_GVT_IRQ_INFO_GT0);
+	SET_BIT_INFO(irq, 20, BCS_MI_FLUSH_DW, INTEL_GVT_IRQ_INFO_GT0);
+	SET_BIT_INFO(irq, 24, BCS_AS_CONTEXT_SWITCH, INTEL_GVT_IRQ_INFO_GT0);
+
+	/* GEN8 interrupt GT1 events */
+	SET_BIT_INFO(irq, 0, VCS_MI_USER_INTERRUPT, INTEL_GVT_IRQ_INFO_GT1);
+	SET_BIT_INFO(irq, 4, VCS_MI_FLUSH_DW, INTEL_GVT_IRQ_INFO_GT1);
+	SET_BIT_INFO(irq, 8, VCS_AS_CONTEXT_SWITCH, INTEL_GVT_IRQ_INFO_GT1);
+
+	if (HAS_ENGINE(gvt->gt, VCS1)) {
+		SET_BIT_INFO(irq, 16, VCS2_MI_USER_INTERRUPT,
+			INTEL_GVT_IRQ_INFO_GT1);
+		SET_BIT_INFO(irq, 20, VCS2_MI_FLUSH_DW,
+			INTEL_GVT_IRQ_INFO_GT1);
+		SET_BIT_INFO(irq, 24, VCS2_AS_CONTEXT_SWITCH,
+			INTEL_GVT_IRQ_INFO_GT1);
+	}
+
+	/* GEN8 interrupt GT3 events */
+	SET_BIT_INFO(irq, 0, VECS_MI_USER_INTERRUPT, INTEL_GVT_IRQ_INFO_GT3);
+	SET_BIT_INFO(irq, 4, VECS_MI_FLUSH_DW, INTEL_GVT_IRQ_INFO_GT3);
+	SET_BIT_INFO(irq, 8, VECS_AS_CONTEXT_SWITCH, INTEL_GVT_IRQ_INFO_GT3);
+
+	SET_BIT_INFO(irq, 0, PIPE_A_VBLANK, INTEL_GVT_IRQ_INFO_DE_PIPE_A);
+	SET_BIT_INFO(irq, 0, PIPE_B_VBLANK, INTEL_GVT_IRQ_INFO_DE_PIPE_B);
+	SET_BIT_INFO(irq, 0, PIPE_C_VBLANK, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
+
+	/* GEN8 interrupt DE PORT events */
+	SET_BIT_INFO(irq, 0, AUX_CHANNEL_A, INTEL_GVT_IRQ_INFO_DE_PORT);
+	SET_BIT_INFO(irq, 3, DP_A_HOTPLUG, INTEL_GVT_IRQ_INFO_DE_PORT);
+
+	/* GEN8 interrupt DE MISC events */
+	SET_BIT_INFO(irq, 0, GSE, INTEL_GVT_IRQ_INFO_DE_MISC);
+
+	/* PCH events */
+	SET_BIT_INFO(irq, 17, GMBUS, INTEL_GVT_IRQ_INFO_PCH);
+	SET_BIT_INFO(irq, 19, CRT_HOTPLUG, INTEL_GVT_IRQ_INFO_PCH);
+	SET_BIT_INFO(irq, 21, DP_B_HOTPLUG, INTEL_GVT_IRQ_INFO_PCH);
+	SET_BIT_INFO(irq, 22, DP_C_HOTPLUG, INTEL_GVT_IRQ_INFO_PCH);
+	SET_BIT_INFO(irq, 23, DP_D_HOTPLUG, INTEL_GVT_IRQ_INFO_PCH);
+
+	if (IS_BROADWELL(gvt->gt->i915)) {
+		SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_PCH);
+		SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_PCH);
+		SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_PCH);
+
+		SET_BIT_INFO(irq, 4, PRIMARY_A_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_A);
+		SET_BIT_INFO(irq, 5, SPRITE_A_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_A);
+
+		SET_BIT_INFO(irq, 4, PRIMARY_B_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_B);
+		SET_BIT_INFO(irq, 5, SPRITE_B_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_B);
+
+		SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
+		SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
+	} else if (GRAPHICS_VER(gvt->gt->i915) >= 9) {
+		SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT);
+		SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT);
+		SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT);
+
+		SET_BIT_INFO(irq, 3, PRIMARY_A_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_A);
+		SET_BIT_INFO(irq, 3, PRIMARY_B_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_B);
+		SET_BIT_INFO(irq, 3, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
+
+		SET_BIT_INFO(irq, 4, SPRITE_A_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_A);
+		SET_BIT_INFO(irq, 4, SPRITE_B_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_B);
+		SET_BIT_INFO(irq, 4, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
+	}
+
+	/* GEN8 interrupt PCU events */
+	SET_BIT_INFO(irq, 24, PCU_THERMAL, INTEL_GVT_IRQ_INFO_PCU);
+	SET_BIT_INFO(irq, 25, PCU_PCODE2DRIVER_MAILBOX, INTEL_GVT_IRQ_INFO_PCU);
+}
+
+static const struct intel_gvt_irq_ops gen8_irq_ops = {
+	.init_irq = gen8_init_irq,
+	.check_pending_irq = gen8_check_pending_irq,
+};
+
+/**
+ * intel_vgpu_trigger_virtual_event - Trigger a virtual event for a vGPU
+ * @vgpu: a vGPU
+ * @event: interrupt event
+ *
+ * This function is used to trigger a virtual interrupt event for vGPU.
+ * The caller provides the event to be triggered, the framework itself
+ * will emulate the IRQ register bit change.
+ *
+ */
+void intel_vgpu_trigger_virtual_event(struct intel_vgpu *vgpu,
+	enum intel_gvt_event_type event)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gvt_irq *irq = &gvt->irq;
+	gvt_event_virt_handler_t handler;
+	const struct intel_gvt_irq_ops *ops = gvt->irq.ops;
+
+	handler = get_event_virt_handler(irq, event);
+	drm_WARN_ON(&i915->drm, !handler);
+
+	handler(irq, event, vgpu);
+
+	ops->check_pending_irq(vgpu);
+}
+
+static void init_events(
+	struct intel_gvt_irq *irq)
+{
+	int i;
+
+	for (i = 0; i < INTEL_GVT_EVENT_MAX; i++) {
+		irq->events[i].info = NULL;
+		irq->events[i].v_handler = handle_default_event_virt;
+	}
+}
+
+/**
+ * intel_gvt_init_irq - initialize GVT-g IRQ emulation subsystem
+ * @gvt: a GVT device
+ *
+ * This function is called at driver loading stage, to initialize the GVT-g IRQ
+ * emulation subsystem.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_gvt_init_irq(struct intel_gvt *gvt)
+{
+	struct intel_gvt_irq *irq = &gvt->irq;
+
+	gvt_dbg_core("init irq framework\n");
+
+	irq->ops = &gen8_irq_ops;
+	irq->irq_map = gen8_irq_map;
+
+	/* common event initialization */
+	init_events(irq);
+
+	/* gen specific initialization */
+	irq->ops->init_irq(irq);
+
+	init_irq_map(irq);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.h b/drivers/gpu/drm/i915/gvt/interrupt.h
new file mode 100644
index 0000000000..b62f04ab47
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/interrupt.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ * Contributors:
+ *    Min he <min.he@intel.com>
+ *
+ */
+
+#ifndef _GVT_INTERRUPT_H_
+#define _GVT_INTERRUPT_H_
+
+#include <linux/hrtimer.h>
+#include <linux/kernel.h>
+
+#include "i915_reg_defs.h"
+
+enum intel_gvt_event_type {
+	RCS_MI_USER_INTERRUPT = 0,
+	RCS_DEBUG,
+	RCS_MMIO_SYNC_FLUSH,
+	RCS_CMD_STREAMER_ERR,
+	RCS_PIPE_CONTROL,
+	RCS_L3_PARITY_ERR,
+	RCS_WATCHDOG_EXCEEDED,
+	RCS_PAGE_DIRECTORY_FAULT,
+	RCS_AS_CONTEXT_SWITCH,
+	RCS_MONITOR_BUFF_HALF_FULL,
+
+	VCS_MI_USER_INTERRUPT,
+	VCS_MMIO_SYNC_FLUSH,
+	VCS_CMD_STREAMER_ERR,
+	VCS_MI_FLUSH_DW,
+	VCS_WATCHDOG_EXCEEDED,
+	VCS_PAGE_DIRECTORY_FAULT,
+	VCS_AS_CONTEXT_SWITCH,
+
+	VCS2_MI_USER_INTERRUPT,
+	VCS2_MI_FLUSH_DW,
+	VCS2_AS_CONTEXT_SWITCH,
+
+	BCS_MI_USER_INTERRUPT,
+	BCS_MMIO_SYNC_FLUSH,
+	BCS_CMD_STREAMER_ERR,
+	BCS_MI_FLUSH_DW,
+	BCS_PAGE_DIRECTORY_FAULT,
+	BCS_AS_CONTEXT_SWITCH,
+
+	VECS_MI_USER_INTERRUPT,
+	VECS_MI_FLUSH_DW,
+	VECS_AS_CONTEXT_SWITCH,
+
+	PIPE_A_FIFO_UNDERRUN,
+	PIPE_B_FIFO_UNDERRUN,
+	PIPE_A_CRC_ERR,
+	PIPE_B_CRC_ERR,
+	PIPE_A_CRC_DONE,
+	PIPE_B_CRC_DONE,
+	PIPE_A_ODD_FIELD,
+	PIPE_B_ODD_FIELD,
+	PIPE_A_EVEN_FIELD,
+	PIPE_B_EVEN_FIELD,
+	PIPE_A_LINE_COMPARE,
+	PIPE_B_LINE_COMPARE,
+	PIPE_C_LINE_COMPARE,
+	PIPE_A_VBLANK,
+	PIPE_B_VBLANK,
+	PIPE_C_VBLANK,
+	PIPE_A_VSYNC,
+	PIPE_B_VSYNC,
+	PIPE_C_VSYNC,
+	PRIMARY_A_FLIP_DONE,
+	PRIMARY_B_FLIP_DONE,
+	PRIMARY_C_FLIP_DONE,
+	SPRITE_A_FLIP_DONE,
+	SPRITE_B_FLIP_DONE,
+	SPRITE_C_FLIP_DONE,
+
+	PCU_THERMAL,
+	PCU_PCODE2DRIVER_MAILBOX,
+
+	DPST_PHASE_IN,
+	DPST_HISTOGRAM,
+	GSE,
+	DP_A_HOTPLUG,
+	AUX_CHANNEL_A,
+	PERF_COUNTER,
+	POISON,
+	GTT_FAULT,
+	ERROR_INTERRUPT_COMBINED,
+
+	FDI_RX_INTERRUPTS_TRANSCODER_A,
+	AUDIO_CP_CHANGE_TRANSCODER_A,
+	AUDIO_CP_REQUEST_TRANSCODER_A,
+	FDI_RX_INTERRUPTS_TRANSCODER_B,
+	AUDIO_CP_CHANGE_TRANSCODER_B,
+	AUDIO_CP_REQUEST_TRANSCODER_B,
+	FDI_RX_INTERRUPTS_TRANSCODER_C,
+	AUDIO_CP_CHANGE_TRANSCODER_C,
+	AUDIO_CP_REQUEST_TRANSCODER_C,
+	ERR_AND_DBG,
+	GMBUS,
+	SDVO_B_HOTPLUG,
+	CRT_HOTPLUG,
+	DP_B_HOTPLUG,
+	DP_C_HOTPLUG,
+	DP_D_HOTPLUG,
+	AUX_CHANNEL_B,
+	AUX_CHANNEL_C,
+	AUX_CHANNEL_D,
+	AUDIO_POWER_STATE_CHANGE_B,
+	AUDIO_POWER_STATE_CHANGE_C,
+	AUDIO_POWER_STATE_CHANGE_D,
+
+	INTEL_GVT_EVENT_RESERVED,
+	INTEL_GVT_EVENT_MAX,
+};
+
+struct intel_gvt_irq;
+struct intel_gvt;
+struct intel_vgpu;
+
+typedef void (*gvt_event_virt_handler_t)(struct intel_gvt_irq *irq,
+	enum intel_gvt_event_type event, struct intel_vgpu *vgpu);
+
+struct intel_gvt_irq_ops {
+	void (*init_irq)(struct intel_gvt_irq *irq);
+	void (*check_pending_irq)(struct intel_vgpu *vgpu);
+};
+
+/* the list of physical interrupt control register groups */
+enum intel_gvt_irq_type {
+	INTEL_GVT_IRQ_INFO_GT,
+	INTEL_GVT_IRQ_INFO_DPY,
+	INTEL_GVT_IRQ_INFO_PCH,
+	INTEL_GVT_IRQ_INFO_PM,
+
+	INTEL_GVT_IRQ_INFO_MASTER,
+	INTEL_GVT_IRQ_INFO_GT0,
+	INTEL_GVT_IRQ_INFO_GT1,
+	INTEL_GVT_IRQ_INFO_GT2,
+	INTEL_GVT_IRQ_INFO_GT3,
+	INTEL_GVT_IRQ_INFO_DE_PIPE_A,
+	INTEL_GVT_IRQ_INFO_DE_PIPE_B,
+	INTEL_GVT_IRQ_INFO_DE_PIPE_C,
+	INTEL_GVT_IRQ_INFO_DE_PORT,
+	INTEL_GVT_IRQ_INFO_DE_MISC,
+	INTEL_GVT_IRQ_INFO_AUD,
+	INTEL_GVT_IRQ_INFO_PCU,
+
+	INTEL_GVT_IRQ_INFO_MAX,
+};
+
+#define INTEL_GVT_IRQ_BITWIDTH	32
+
+/* device specific interrupt bit definitions */
+struct intel_gvt_irq_info {
+	char *name;
+	i915_reg_t reg_base;
+	enum intel_gvt_event_type bit_to_event[INTEL_GVT_IRQ_BITWIDTH];
+	unsigned long warned;
+	int group;
+	DECLARE_BITMAP(downstream_irq_bitmap, INTEL_GVT_IRQ_BITWIDTH);
+	bool has_upstream_irq;
+};
+
+/* per-event information */
+struct intel_gvt_event_info {
+	int bit;				/* map to register bit */
+	int policy;				/* forwarding policy */
+	struct intel_gvt_irq_info *info;	/* register info */
+	gvt_event_virt_handler_t v_handler;	/* for v_event */
+};
+
+struct intel_gvt_irq_map {
+	int up_irq_group;
+	int up_irq_bit;
+	int down_irq_group;
+	u32 down_irq_bitmask;
+};
+
+/* structure containing device specific IRQ state */
+struct intel_gvt_irq {
+	const struct intel_gvt_irq_ops *ops;
+	struct intel_gvt_irq_info *info[INTEL_GVT_IRQ_INFO_MAX];
+	DECLARE_BITMAP(irq_info_bitmap, INTEL_GVT_IRQ_INFO_MAX);
+	struct intel_gvt_event_info events[INTEL_GVT_EVENT_MAX];
+	DECLARE_BITMAP(pending_events, INTEL_GVT_EVENT_MAX);
+	struct intel_gvt_irq_map *irq_map;
+};
+
+int intel_gvt_init_irq(struct intel_gvt *gvt);
+
+void intel_vgpu_trigger_virtual_event(struct intel_vgpu *vgpu,
+	enum intel_gvt_event_type event);
+
+int intel_vgpu_reg_iir_handler(struct intel_vgpu *vgpu, unsigned int reg,
+	void *p_data, unsigned int bytes);
+int intel_vgpu_reg_ier_handler(struct intel_vgpu *vgpu,
+	unsigned int reg, void *p_data, unsigned int bytes);
+int intel_vgpu_reg_master_irq_handler(struct intel_vgpu *vgpu,
+	unsigned int reg, void *p_data, unsigned int bytes);
+int intel_vgpu_reg_imr_handler(struct intel_vgpu *vgpu,
+	unsigned int reg, void *p_data, unsigned int bytes);
+
+int gvt_ring_id_to_pipe_control_notify_event(int ring_id);
+int gvt_ring_id_to_mi_flush_dw_event(int ring_id);
+int gvt_ring_id_to_mi_user_interrupt_event(int ring_id);
+
+#endif /* _GVT_INTERRUPT_H_ */
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
new file mode 100644
index 0000000000..42ce20e72d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -0,0 +1,1989 @@
+/*
+ * KVMGT - the implementation of Intel mediated pass-through framework for KVM
+ *
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Jike Song <jike.song@intel.com>
+ *    Xiaoguang Chen <xiaoguang.chen@intel.com>
+ *    Eddie Dong <eddie.dong@intel.com>
+ *
+ * Contributors:
+ *    Niu Bing <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/kthread.h>
+#include <linux/sched/mm.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/eventfd.h>
+#include <linux/mdev.h>
+#include <linux/debugfs.h>
+
+#include <linux/nospec.h>
+
+#include <drm/drm_edid.h>
+
+#include "i915_drv.h"
+#include "intel_gvt.h"
+#include "gvt.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+MODULE_IMPORT_NS(I915_GVT);
+
+/* helper macros copied from vfio-pci */
+#define VFIO_PCI_OFFSET_SHIFT   40
+#define VFIO_PCI_OFFSET_TO_INDEX(off)   (off >> VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
+#define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
+
+#define EDID_BLOB_OFFSET (PAGE_SIZE/2)
+
+#define OPREGION_SIGNATURE "IntelGraphicsMem"
+
+struct vfio_region;
+struct intel_vgpu_regops {
+	size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
+			size_t count, loff_t *ppos, bool iswrite);
+	void (*release)(struct intel_vgpu *vgpu,
+			struct vfio_region *region);
+};
+
+struct vfio_region {
+	u32				type;
+	u32				subtype;
+	size_t				size;
+	u32				flags;
+	const struct intel_vgpu_regops	*ops;
+	void				*data;
+};
+
+struct vfio_edid_region {
+	struct vfio_region_gfx_edid vfio_edid_regs;
+	void *edid_blob;
+};
+
+struct kvmgt_pgfn {
+	gfn_t gfn;
+	struct hlist_node hnode;
+};
+
+struct gvt_dma {
+	struct intel_vgpu *vgpu;
+	struct rb_node gfn_node;
+	struct rb_node dma_addr_node;
+	gfn_t gfn;
+	dma_addr_t dma_addr;
+	unsigned long size;
+	struct kref ref;
+};
+
+#define vfio_dev_to_vgpu(vfio_dev) \
+	container_of((vfio_dev), struct intel_vgpu, vfio_device)
+
+static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
+				   struct kvm_page_track_notifier_node *node);
+static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
+					   struct kvm_page_track_notifier_node *node);
+
+static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
+{
+	struct intel_vgpu_type *type =
+		container_of(mtype, struct intel_vgpu_type, type);
+
+	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
+		       "fence: %d\nresolution: %s\n"
+		       "weight: %d\n",
+		       BYTES_TO_MB(type->conf->low_mm),
+		       BYTES_TO_MB(type->conf->high_mm),
+		       type->conf->fence, vgpu_edid_str(type->conf->edid),
+		       type->conf->weight);
+}
+
+static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
+		unsigned long size)
+{
+	vfio_unpin_pages(&vgpu->vfio_device, gfn << PAGE_SHIFT,
+			 DIV_ROUND_UP(size, PAGE_SIZE));
+}
+
+/* Pin a normal or compound guest page for dma. */
+static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
+		unsigned long size, struct page **page)
+{
+	int total_pages = DIV_ROUND_UP(size, PAGE_SIZE);
+	struct page *base_page = NULL;
+	int npage;
+	int ret;
+
+	/*
+	 * We pin the pages one-by-one to avoid allocating a big arrary
+	 * on stack to hold pfns.
+	 */
+	for (npage = 0; npage < total_pages; npage++) {
+		dma_addr_t cur_iova = (gfn + npage) << PAGE_SHIFT;
+		struct page *cur_page;
+
+		ret = vfio_pin_pages(&vgpu->vfio_device, cur_iova, 1,
+				     IOMMU_READ | IOMMU_WRITE, &cur_page);
+		if (ret != 1) {
+			gvt_vgpu_err("vfio_pin_pages failed for iova %pad, ret %d\n",
+				     &cur_iova, ret);
+			goto err;
+		}
+
+		if (npage == 0)
+			base_page = cur_page;
+		else if (page_to_pfn(base_page) + npage != page_to_pfn(cur_page)) {
+			ret = -EINVAL;
+			npage++;
+			goto err;
+		}
+	}
+
+	*page = base_page;
+	return 0;
+err:
+	if (npage)
+		gvt_unpin_guest_page(vgpu, gfn, npage * PAGE_SIZE);
+	return ret;
+}
+
+static int gvt_dma_map_page(struct intel_vgpu *vgpu, unsigned long gfn,
+		dma_addr_t *dma_addr, unsigned long size)
+{
+	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
+	struct page *page = NULL;
+	int ret;
+
+	ret = gvt_pin_guest_page(vgpu, gfn, size, &page);
+	if (ret)
+		return ret;
+
+	/* Setup DMA mapping. */
+	*dma_addr = dma_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(dev, *dma_addr)) {
+		gvt_vgpu_err("DMA mapping failed for pfn 0x%lx, ret %d\n",
+			     page_to_pfn(page), ret);
+		gvt_unpin_guest_page(vgpu, gfn, size);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void gvt_dma_unmap_page(struct intel_vgpu *vgpu, unsigned long gfn,
+		dma_addr_t dma_addr, unsigned long size)
+{
+	struct device *dev = vgpu->gvt->gt->i915->drm.dev;
+
+	dma_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL);
+	gvt_unpin_guest_page(vgpu, gfn, size);
+}
+
+static struct gvt_dma *__gvt_cache_find_dma_addr(struct intel_vgpu *vgpu,
+		dma_addr_t dma_addr)
+{
+	struct rb_node *node = vgpu->dma_addr_cache.rb_node;
+	struct gvt_dma *itr;
+
+	while (node) {
+		itr = rb_entry(node, struct gvt_dma, dma_addr_node);
+
+		if (dma_addr < itr->dma_addr)
+			node = node->rb_left;
+		else if (dma_addr > itr->dma_addr)
+			node = node->rb_right;
+		else
+			return itr;
+	}
+	return NULL;
+}
+
+static struct gvt_dma *__gvt_cache_find_gfn(struct intel_vgpu *vgpu, gfn_t gfn)
+{
+	struct rb_node *node = vgpu->gfn_cache.rb_node;
+	struct gvt_dma *itr;
+
+	while (node) {
+		itr = rb_entry(node, struct gvt_dma, gfn_node);
+
+		if (gfn < itr->gfn)
+			node = node->rb_left;
+		else if (gfn > itr->gfn)
+			node = node->rb_right;
+		else
+			return itr;
+	}
+	return NULL;
+}
+
+static int __gvt_cache_add(struct intel_vgpu *vgpu, gfn_t gfn,
+		dma_addr_t dma_addr, unsigned long size)
+{
+	struct gvt_dma *new, *itr;
+	struct rb_node **link, *parent = NULL;
+
+	new = kzalloc(sizeof(struct gvt_dma), GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
+
+	new->vgpu = vgpu;
+	new->gfn = gfn;
+	new->dma_addr = dma_addr;
+	new->size = size;
+	kref_init(&new->ref);
+
+	/* gfn_cache maps gfn to struct gvt_dma. */
+	link = &vgpu->gfn_cache.rb_node;
+	while (*link) {
+		parent = *link;
+		itr = rb_entry(parent, struct gvt_dma, gfn_node);
+
+		if (gfn < itr->gfn)
+			link = &parent->rb_left;
+		else
+			link = &parent->rb_right;
+	}
+	rb_link_node(&new->gfn_node, parent, link);
+	rb_insert_color(&new->gfn_node, &vgpu->gfn_cache);
+
+	/* dma_addr_cache maps dma addr to struct gvt_dma. */
+	parent = NULL;
+	link = &vgpu->dma_addr_cache.rb_node;
+	while (*link) {
+		parent = *link;
+		itr = rb_entry(parent, struct gvt_dma, dma_addr_node);
+
+		if (dma_addr < itr->dma_addr)
+			link = &parent->rb_left;
+		else
+			link = &parent->rb_right;
+	}
+	rb_link_node(&new->dma_addr_node, parent, link);
+	rb_insert_color(&new->dma_addr_node, &vgpu->dma_addr_cache);
+
+	vgpu->nr_cache_entries++;
+	return 0;
+}
+
+static void __gvt_cache_remove_entry(struct intel_vgpu *vgpu,
+				struct gvt_dma *entry)
+{
+	rb_erase(&entry->gfn_node, &vgpu->gfn_cache);
+	rb_erase(&entry->dma_addr_node, &vgpu->dma_addr_cache);
+	kfree(entry);
+	vgpu->nr_cache_entries--;
+}
+
+static void gvt_cache_destroy(struct intel_vgpu *vgpu)
+{
+	struct gvt_dma *dma;
+	struct rb_node *node = NULL;
+
+	for (;;) {
+		mutex_lock(&vgpu->cache_lock);
+		node = rb_first(&vgpu->gfn_cache);
+		if (!node) {
+			mutex_unlock(&vgpu->cache_lock);
+			break;
+		}
+		dma = rb_entry(node, struct gvt_dma, gfn_node);
+		gvt_dma_unmap_page(vgpu, dma->gfn, dma->dma_addr, dma->size);
+		__gvt_cache_remove_entry(vgpu, dma);
+		mutex_unlock(&vgpu->cache_lock);
+	}
+}
+
+static void gvt_cache_init(struct intel_vgpu *vgpu)
+{
+	vgpu->gfn_cache = RB_ROOT;
+	vgpu->dma_addr_cache = RB_ROOT;
+	vgpu->nr_cache_entries = 0;
+	mutex_init(&vgpu->cache_lock);
+}
+
+static void kvmgt_protect_table_init(struct intel_vgpu *info)
+{
+	hash_init(info->ptable);
+}
+
+static void kvmgt_protect_table_destroy(struct intel_vgpu *info)
+{
+	struct kvmgt_pgfn *p;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(info->ptable, i, tmp, p, hnode) {
+		hash_del(&p->hnode);
+		kfree(p);
+	}
+}
+
+static struct kvmgt_pgfn *
+__kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn)
+{
+	struct kvmgt_pgfn *p, *res = NULL;
+
+	lockdep_assert_held(&info->vgpu_lock);
+
+	hash_for_each_possible(info->ptable, p, hnode, gfn) {
+		if (gfn == p->gfn) {
+			res = p;
+			break;
+		}
+	}
+
+	return res;
+}
+
+static bool kvmgt_gfn_is_write_protected(struct intel_vgpu *info, gfn_t gfn)
+{
+	struct kvmgt_pgfn *p;
+
+	p = __kvmgt_protect_table_find(info, gfn);
+	return !!p;
+}
+
+static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn)
+{
+	struct kvmgt_pgfn *p;
+
+	if (kvmgt_gfn_is_write_protected(info, gfn))
+		return;
+
+	p = kzalloc(sizeof(struct kvmgt_pgfn), GFP_ATOMIC);
+	if (WARN(!p, "gfn: 0x%llx\n", gfn))
+		return;
+
+	p->gfn = gfn;
+	hash_add(info->ptable, &p->hnode, gfn);
+}
+
+static void kvmgt_protect_table_del(struct intel_vgpu *info, gfn_t gfn)
+{
+	struct kvmgt_pgfn *p;
+
+	p = __kvmgt_protect_table_find(info, gfn);
+	if (p) {
+		hash_del(&p->hnode);
+		kfree(p);
+	}
+}
+
+static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
+		size_t count, loff_t *ppos, bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
+			VFIO_PCI_NUM_REGIONS;
+	void *base = vgpu->region[i].data;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+
+	if (pos >= vgpu->region[i].size || iswrite) {
+		gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
+		return -EINVAL;
+	}
+	count = min(count, (size_t)(vgpu->region[i].size - pos));
+	memcpy(buf, base + pos, count);
+
+	return count;
+}
+
+static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
+		struct vfio_region *region)
+{
+}
+
+static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
+	.rw = intel_vgpu_reg_rw_opregion,
+	.release = intel_vgpu_reg_release_opregion,
+};
+
+static int handle_edid_regs(struct intel_vgpu *vgpu,
+			struct vfio_edid_region *region, char *buf,
+			size_t count, u16 offset, bool is_write)
+{
+	struct vfio_region_gfx_edid *regs = &region->vfio_edid_regs;
+	unsigned int data;
+
+	if (offset + count > sizeof(*regs))
+		return -EINVAL;
+
+	if (count != 4)
+		return -EINVAL;
+
+	if (is_write) {
+		data = *((unsigned int *)buf);
+		switch (offset) {
+		case offsetof(struct vfio_region_gfx_edid, link_state):
+			if (data == VFIO_DEVICE_GFX_LINK_STATE_UP) {
+				if (!drm_edid_block_valid(
+					(u8 *)region->edid_blob,
+					0,
+					true,
+					NULL)) {
+					gvt_vgpu_err("invalid EDID blob\n");
+					return -EINVAL;
+				}
+				intel_vgpu_emulate_hotplug(vgpu, true);
+			} else if (data == VFIO_DEVICE_GFX_LINK_STATE_DOWN)
+				intel_vgpu_emulate_hotplug(vgpu, false);
+			else {
+				gvt_vgpu_err("invalid EDID link state %d\n",
+					regs->link_state);
+				return -EINVAL;
+			}
+			regs->link_state = data;
+			break;
+		case offsetof(struct vfio_region_gfx_edid, edid_size):
+			if (data > regs->edid_max_size) {
+				gvt_vgpu_err("EDID size is bigger than %d!\n",
+					regs->edid_max_size);
+				return -EINVAL;
+			}
+			regs->edid_size = data;
+			break;
+		default:
+			/* read-only regs */
+			gvt_vgpu_err("write read-only EDID region at offset %d\n",
+				offset);
+			return -EPERM;
+		}
+	} else {
+		memcpy(buf, (char *)regs + offset, count);
+	}
+
+	return count;
+}
+
+static int handle_edid_blob(struct vfio_edid_region *region, char *buf,
+			size_t count, u16 offset, bool is_write)
+{
+	if (offset + count > region->vfio_edid_regs.edid_size)
+		return -EINVAL;
+
+	if (is_write)
+		memcpy(region->edid_blob + offset, buf, count);
+	else
+		memcpy(buf, region->edid_blob + offset, count);
+
+	return count;
+}
+
+static size_t intel_vgpu_reg_rw_edid(struct intel_vgpu *vgpu, char *buf,
+		size_t count, loff_t *ppos, bool iswrite)
+{
+	int ret;
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
+			VFIO_PCI_NUM_REGIONS;
+	struct vfio_edid_region *region = vgpu->region[i].data;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+	if (pos < region->vfio_edid_regs.edid_offset) {
+		ret = handle_edid_regs(vgpu, region, buf, count, pos, iswrite);
+	} else {
+		pos -= EDID_BLOB_OFFSET;
+		ret = handle_edid_blob(region, buf, count, pos, iswrite);
+	}
+
+	if (ret < 0)
+		gvt_vgpu_err("failed to access EDID region\n");
+
+	return ret;
+}
+
+static void intel_vgpu_reg_release_edid(struct intel_vgpu *vgpu,
+					struct vfio_region *region)
+{
+	kfree(region->data);
+}
+
+static const struct intel_vgpu_regops intel_vgpu_regops_edid = {
+	.rw = intel_vgpu_reg_rw_edid,
+	.release = intel_vgpu_reg_release_edid,
+};
+
+static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
+		unsigned int type, unsigned int subtype,
+		const struct intel_vgpu_regops *ops,
+		size_t size, u32 flags, void *data)
+{
+	struct vfio_region *region;
+
+	region = krealloc(vgpu->region,
+			(vgpu->num_regions + 1) * sizeof(*region),
+			GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	vgpu->region = region;
+	vgpu->region[vgpu->num_regions].type = type;
+	vgpu->region[vgpu->num_regions].subtype = subtype;
+	vgpu->region[vgpu->num_regions].ops = ops;
+	vgpu->region[vgpu->num_regions].size = size;
+	vgpu->region[vgpu->num_regions].flags = flags;
+	vgpu->region[vgpu->num_regions].data = data;
+	vgpu->num_regions++;
+	return 0;
+}
+
+int intel_gvt_set_opregion(struct intel_vgpu *vgpu)
+{
+	void *base;
+	int ret;
+
+	/* Each vgpu has its own opregion, although VFIO would create another
+	 * one later. This one is used to expose opregion to VFIO. And the
+	 * other one created by VFIO later, is used by guest actually.
+	 */
+	base = vgpu_opregion(vgpu)->va;
+	if (!base)
+		return -ENOMEM;
+
+	if (memcmp(base, OPREGION_SIGNATURE, 16)) {
+		memunmap(base);
+		return -EINVAL;
+	}
+
+	ret = intel_vgpu_register_reg(vgpu,
+			PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+			VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
+			&intel_vgpu_regops_opregion, OPREGION_SIZE,
+			VFIO_REGION_INFO_FLAG_READ, base);
+
+	return ret;
+}
+
+int intel_gvt_set_edid(struct intel_vgpu *vgpu, int port_num)
+{
+	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
+	struct vfio_edid_region *base;
+	int ret;
+
+	base = kzalloc(sizeof(*base), GFP_KERNEL);
+	if (!base)
+		return -ENOMEM;
+
+	/* TODO: Add multi-port and EDID extension block support */
+	base->vfio_edid_regs.edid_offset = EDID_BLOB_OFFSET;
+	base->vfio_edid_regs.edid_max_size = EDID_SIZE;
+	base->vfio_edid_regs.edid_size = EDID_SIZE;
+	base->vfio_edid_regs.max_xres = vgpu_edid_xres(port->id);
+	base->vfio_edid_regs.max_yres = vgpu_edid_yres(port->id);
+	base->edid_blob = port->edid->edid_block;
+
+	ret = intel_vgpu_register_reg(vgpu,
+			VFIO_REGION_TYPE_GFX,
+			VFIO_REGION_SUBTYPE_GFX_EDID,
+			&intel_vgpu_regops_edid, EDID_SIZE,
+			VFIO_REGION_INFO_FLAG_READ |
+			VFIO_REGION_INFO_FLAG_WRITE |
+			VFIO_REGION_INFO_FLAG_CAPS, base);
+
+	return ret;
+}
+
+static void intel_vgpu_dma_unmap(struct vfio_device *vfio_dev, u64 iova,
+				 u64 length)
+{
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+	struct gvt_dma *entry;
+	u64 iov_pfn = iova >> PAGE_SHIFT;
+	u64 end_iov_pfn = iov_pfn + length / PAGE_SIZE;
+
+	mutex_lock(&vgpu->cache_lock);
+	for (; iov_pfn < end_iov_pfn; iov_pfn++) {
+		entry = __gvt_cache_find_gfn(vgpu, iov_pfn);
+		if (!entry)
+			continue;
+
+		gvt_dma_unmap_page(vgpu, entry->gfn, entry->dma_addr,
+				   entry->size);
+		__gvt_cache_remove_entry(vgpu, entry);
+	}
+	mutex_unlock(&vgpu->cache_lock);
+}
+
+static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu *itr;
+	int id;
+	bool ret = false;
+
+	mutex_lock(&vgpu->gvt->lock);
+	for_each_active_vgpu(vgpu->gvt, itr, id) {
+		if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, itr->status))
+			continue;
+
+		if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
+			ret = true;
+			goto out;
+		}
+	}
+out:
+	mutex_unlock(&vgpu->gvt->lock);
+	return ret;
+}
+
+static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
+{
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+	int ret;
+
+	if (__kvmgt_vgpu_exist(vgpu))
+		return -EEXIST;
+
+	vgpu->track_node.track_write = kvmgt_page_track_write;
+	vgpu->track_node.track_remove_region = kvmgt_page_track_remove_region;
+	ret = kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
+					       &vgpu->track_node);
+	if (ret) {
+		gvt_vgpu_err("KVM is required to use Intel vGPU\n");
+		return ret;
+	}
+
+	set_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
+
+	debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
+			     &vgpu->nr_cache_entries);
+
+	intel_gvt_activate_vgpu(vgpu);
+
+	return 0;
+}
+
+static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
+{
+	struct eventfd_ctx *trigger;
+
+	trigger = vgpu->msi_trigger;
+	if (trigger) {
+		eventfd_ctx_put(trigger);
+		vgpu->msi_trigger = NULL;
+	}
+}
+
+static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
+{
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+
+	intel_gvt_release_vgpu(vgpu);
+
+	clear_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status);
+
+	debugfs_lookup_and_remove(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs);
+
+	kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
+					   &vgpu->track_node);
+
+	kvmgt_protect_table_destroy(vgpu);
+	gvt_cache_destroy(vgpu);
+
+	WARN_ON(vgpu->nr_cache_entries);
+
+	vgpu->gfn_cache = RB_ROOT;
+	vgpu->dma_addr_cache = RB_ROOT;
+
+	intel_vgpu_release_msi_eventfd_ctx(vgpu);
+}
+
+static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
+{
+	u32 start_lo, start_hi;
+	u32 mem_type;
+
+	start_lo = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
+			PCI_BASE_ADDRESS_MEM_MASK;
+	mem_type = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space + bar)) &
+			PCI_BASE_ADDRESS_MEM_TYPE_MASK;
+
+	switch (mem_type) {
+	case PCI_BASE_ADDRESS_MEM_TYPE_64:
+		start_hi = (*(u32 *)(vgpu->cfg_space.virtual_cfg_space
+						+ bar + 4));
+		break;
+	case PCI_BASE_ADDRESS_MEM_TYPE_32:
+	case PCI_BASE_ADDRESS_MEM_TYPE_1M:
+		/* 1M mem BAR treated as 32-bit BAR */
+	default:
+		/* mem unknown type treated as 32-bit BAR */
+		start_hi = 0;
+		break;
+	}
+
+	return ((u64)start_hi << 32) | start_lo;
+}
+
+static int intel_vgpu_bar_rw(struct intel_vgpu *vgpu, int bar, u64 off,
+			     void *buf, unsigned int count, bool is_write)
+{
+	u64 bar_start = intel_vgpu_get_bar_addr(vgpu, bar);
+	int ret;
+
+	if (is_write)
+		ret = intel_vgpu_emulate_mmio_write(vgpu,
+					bar_start + off, buf, count);
+	else
+		ret = intel_vgpu_emulate_mmio_read(vgpu,
+					bar_start + off, buf, count);
+	return ret;
+}
+
+static inline bool intel_vgpu_in_aperture(struct intel_vgpu *vgpu, u64 off)
+{
+	return off >= vgpu_aperture_offset(vgpu) &&
+	       off < vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu);
+}
+
+static int intel_vgpu_aperture_rw(struct intel_vgpu *vgpu, u64 off,
+		void *buf, unsigned long count, bool is_write)
+{
+	void __iomem *aperture_va;
+
+	if (!intel_vgpu_in_aperture(vgpu, off) ||
+	    !intel_vgpu_in_aperture(vgpu, off + count)) {
+		gvt_vgpu_err("Invalid aperture offset %llu\n", off);
+		return -EINVAL;
+	}
+
+	aperture_va = io_mapping_map_wc(&vgpu->gvt->gt->ggtt->iomap,
+					ALIGN_DOWN(off, PAGE_SIZE),
+					count + offset_in_page(off));
+	if (!aperture_va)
+		return -EIO;
+
+	if (is_write)
+		memcpy_toio(aperture_va + offset_in_page(off), buf, count);
+	else
+		memcpy_fromio(buf, aperture_va + offset_in_page(off), count);
+
+	io_mapping_unmap(aperture_va);
+
+	return 0;
+}
+
+static ssize_t intel_vgpu_rw(struct intel_vgpu *vgpu, char *buf,
+			size_t count, loff_t *ppos, bool is_write)
+{
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+	u64 pos = *ppos & VFIO_PCI_OFFSET_MASK;
+	int ret = -EINVAL;
+
+
+	if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions) {
+		gvt_vgpu_err("invalid index: %u\n", index);
+		return -EINVAL;
+	}
+
+	switch (index) {
+	case VFIO_PCI_CONFIG_REGION_INDEX:
+		if (is_write)
+			ret = intel_vgpu_emulate_cfg_write(vgpu, pos,
+						buf, count);
+		else
+			ret = intel_vgpu_emulate_cfg_read(vgpu, pos,
+						buf, count);
+		break;
+	case VFIO_PCI_BAR0_REGION_INDEX:
+		ret = intel_vgpu_bar_rw(vgpu, PCI_BASE_ADDRESS_0, pos,
+					buf, count, is_write);
+		break;
+	case VFIO_PCI_BAR2_REGION_INDEX:
+		ret = intel_vgpu_aperture_rw(vgpu, pos, buf, count, is_write);
+		break;
+	case VFIO_PCI_BAR1_REGION_INDEX:
+	case VFIO_PCI_BAR3_REGION_INDEX:
+	case VFIO_PCI_BAR4_REGION_INDEX:
+	case VFIO_PCI_BAR5_REGION_INDEX:
+	case VFIO_PCI_VGA_REGION_INDEX:
+	case VFIO_PCI_ROM_REGION_INDEX:
+		break;
+	default:
+		if (index >= VFIO_PCI_NUM_REGIONS + vgpu->num_regions)
+			return -EINVAL;
+
+		index -= VFIO_PCI_NUM_REGIONS;
+		return vgpu->region[index].ops->rw(vgpu, buf, count,
+				ppos, is_write);
+	}
+
+	return ret == 0 ? count : ret;
+}
+
+static bool gtt_entry(struct intel_vgpu *vgpu, loff_t *ppos)
+{
+	unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+	struct intel_gvt *gvt = vgpu->gvt;
+	int offset;
+
+	/* Only allow MMIO GGTT entry access */
+	if (index != PCI_BASE_ADDRESS_0)
+		return false;
+
+	offset = (u64)(*ppos & VFIO_PCI_OFFSET_MASK) -
+		intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
+
+	return (offset >= gvt->device_info.gtt_start_offset &&
+		offset < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt)) ?
+			true : false;
+}
+
+static ssize_t intel_vgpu_read(struct vfio_device *vfio_dev, char __user *buf,
+			size_t count, loff_t *ppos)
+{
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+	unsigned int done = 0;
+	int ret;
+
+	while (count) {
+		size_t filled;
+
+		/* Only support GGTT entry 8 bytes read */
+		if (count >= 8 && !(*ppos % 8) &&
+			gtt_entry(vgpu, ppos)) {
+			u64 val;
+
+			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
+					ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 8;
+		} else if (count >= 4 && !(*ppos % 4)) {
+			u32 val;
+
+			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
+					ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 4;
+		} else if (count >= 2 && !(*ppos % 2)) {
+			u16 val;
+
+			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
+					ppos, false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 2;
+		} else {
+			u8 val;
+
+			ret = intel_vgpu_rw(vgpu, &val, sizeof(val), ppos,
+					false);
+			if (ret <= 0)
+				goto read_err;
+
+			if (copy_to_user(buf, &val, sizeof(val)))
+				goto read_err;
+
+			filled = 1;
+		}
+
+		count -= filled;
+		done += filled;
+		*ppos += filled;
+		buf += filled;
+	}
+
+	return done;
+
+read_err:
+	return -EFAULT;
+}
+
+static ssize_t intel_vgpu_write(struct vfio_device *vfio_dev,
+				const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+	unsigned int done = 0;
+	int ret;
+
+	while (count) {
+		size_t filled;
+
+		/* Only support GGTT entry 8 bytes write */
+		if (count >= 8 && !(*ppos % 8) &&
+			gtt_entry(vgpu, ppos)) {
+			u64 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
+					ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 8;
+		} else if (count >= 4 && !(*ppos % 4)) {
+			u32 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = intel_vgpu_rw(vgpu, (char *)&val, sizeof(val),
+					ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 4;
+		} else if (count >= 2 && !(*ppos % 2)) {
+			u16 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = intel_vgpu_rw(vgpu, (char *)&val,
+					sizeof(val), ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 2;
+		} else {
+			u8 val;
+
+			if (copy_from_user(&val, buf, sizeof(val)))
+				goto write_err;
+
+			ret = intel_vgpu_rw(vgpu, &val, sizeof(val),
+					ppos, true);
+			if (ret <= 0)
+				goto write_err;
+
+			filled = 1;
+		}
+
+		count -= filled;
+		done += filled;
+		*ppos += filled;
+		buf += filled;
+	}
+
+	return done;
+write_err:
+	return -EFAULT;
+}
+
+static int intel_vgpu_mmap(struct vfio_device *vfio_dev,
+		struct vm_area_struct *vma)
+{
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+	unsigned int index;
+	u64 virtaddr;
+	unsigned long req_size, pgoff, req_start;
+	pgprot_t pg_prot;
+
+	index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
+	if (index >= VFIO_PCI_ROM_REGION_INDEX)
+		return -EINVAL;
+
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+	if ((vma->vm_flags & VM_SHARED) == 0)
+		return -EINVAL;
+	if (index != VFIO_PCI_BAR2_REGION_INDEX)
+		return -EINVAL;
+
+	pg_prot = vma->vm_page_prot;
+	virtaddr = vma->vm_start;
+	req_size = vma->vm_end - vma->vm_start;
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	req_start = pgoff << PAGE_SHIFT;
+
+	if (!intel_vgpu_in_aperture(vgpu, req_start))
+		return -EINVAL;
+	if (req_start + req_size >
+	    vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
+		return -EINVAL;
+
+	pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
+
+	return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
+}
+
+static int intel_vgpu_get_irq_count(struct intel_vgpu *vgpu, int type)
+{
+	if (type == VFIO_PCI_INTX_IRQ_INDEX || type == VFIO_PCI_MSI_IRQ_INDEX)
+		return 1;
+
+	return 0;
+}
+
+static int intel_vgpu_set_intx_mask(struct intel_vgpu *vgpu,
+			unsigned int index, unsigned int start,
+			unsigned int count, u32 flags,
+			void *data)
+{
+	return 0;
+}
+
+static int intel_vgpu_set_intx_unmask(struct intel_vgpu *vgpu,
+			unsigned int index, unsigned int start,
+			unsigned int count, u32 flags, void *data)
+{
+	return 0;
+}
+
+static int intel_vgpu_set_intx_trigger(struct intel_vgpu *vgpu,
+		unsigned int index, unsigned int start, unsigned int count,
+		u32 flags, void *data)
+{
+	return 0;
+}
+
+static int intel_vgpu_set_msi_trigger(struct intel_vgpu *vgpu,
+		unsigned int index, unsigned int start, unsigned int count,
+		u32 flags, void *data)
+{
+	struct eventfd_ctx *trigger;
+
+	if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+		int fd = *(int *)data;
+
+		trigger = eventfd_ctx_fdget(fd);
+		if (IS_ERR(trigger)) {
+			gvt_vgpu_err("eventfd_ctx_fdget failed\n");
+			return PTR_ERR(trigger);
+		}
+		vgpu->msi_trigger = trigger;
+	} else if ((flags & VFIO_IRQ_SET_DATA_NONE) && !count)
+		intel_vgpu_release_msi_eventfd_ctx(vgpu);
+
+	return 0;
+}
+
+static int intel_vgpu_set_irqs(struct intel_vgpu *vgpu, u32 flags,
+		unsigned int index, unsigned int start, unsigned int count,
+		void *data)
+{
+	int (*func)(struct intel_vgpu *vgpu, unsigned int index,
+			unsigned int start, unsigned int count, u32 flags,
+			void *data) = NULL;
+
+	switch (index) {
+	case VFIO_PCI_INTX_IRQ_INDEX:
+		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+		case VFIO_IRQ_SET_ACTION_MASK:
+			func = intel_vgpu_set_intx_mask;
+			break;
+		case VFIO_IRQ_SET_ACTION_UNMASK:
+			func = intel_vgpu_set_intx_unmask;
+			break;
+		case VFIO_IRQ_SET_ACTION_TRIGGER:
+			func = intel_vgpu_set_intx_trigger;
+			break;
+		}
+		break;
+	case VFIO_PCI_MSI_IRQ_INDEX:
+		switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) {
+		case VFIO_IRQ_SET_ACTION_MASK:
+		case VFIO_IRQ_SET_ACTION_UNMASK:
+			/* XXX Need masking support exported */
+			break;
+		case VFIO_IRQ_SET_ACTION_TRIGGER:
+			func = intel_vgpu_set_msi_trigger;
+			break;
+		}
+		break;
+	}
+
+	if (!func)
+		return -ENOTTY;
+
+	return func(vgpu, index, start, count, flags, data);
+}
+
+static long intel_vgpu_ioctl(struct vfio_device *vfio_dev, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+	unsigned long minsz;
+
+	gvt_dbg_core("vgpu%d ioctl, cmd: %d\n", vgpu->id, cmd);
+
+	if (cmd == VFIO_DEVICE_GET_INFO) {
+		struct vfio_device_info info;
+
+		minsz = offsetofend(struct vfio_device_info, num_irqs);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		info.flags = VFIO_DEVICE_FLAGS_PCI;
+		info.flags |= VFIO_DEVICE_FLAGS_RESET;
+		info.num_regions = VFIO_PCI_NUM_REGIONS +
+				vgpu->num_regions;
+		info.num_irqs = VFIO_PCI_NUM_IRQS;
+
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
+
+	} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
+		struct vfio_region_info info;
+		struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+		unsigned int i;
+		int ret;
+		struct vfio_region_info_cap_sparse_mmap *sparse = NULL;
+		int nr_areas = 1;
+		int cap_type_id;
+
+		minsz = offsetofend(struct vfio_region_info, offset);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz)
+			return -EINVAL;
+
+		switch (info.index) {
+		case VFIO_PCI_CONFIG_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = vgpu->gvt->device_info.cfg_space_size;
+			info.flags = VFIO_REGION_INFO_FLAG_READ |
+				     VFIO_REGION_INFO_FLAG_WRITE;
+			break;
+		case VFIO_PCI_BAR0_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = vgpu->cfg_space.bar[info.index].size;
+			if (!info.size) {
+				info.flags = 0;
+				break;
+			}
+
+			info.flags = VFIO_REGION_INFO_FLAG_READ |
+				     VFIO_REGION_INFO_FLAG_WRITE;
+			break;
+		case VFIO_PCI_BAR1_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = 0;
+			info.flags = 0;
+			break;
+		case VFIO_PCI_BAR2_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.flags = VFIO_REGION_INFO_FLAG_CAPS |
+					VFIO_REGION_INFO_FLAG_MMAP |
+					VFIO_REGION_INFO_FLAG_READ |
+					VFIO_REGION_INFO_FLAG_WRITE;
+			info.size = gvt_aperture_sz(vgpu->gvt);
+
+			sparse = kzalloc(struct_size(sparse, areas, nr_areas),
+					 GFP_KERNEL);
+			if (!sparse)
+				return -ENOMEM;
+
+			sparse->header.id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
+			sparse->header.version = 1;
+			sparse->nr_areas = nr_areas;
+			cap_type_id = VFIO_REGION_INFO_CAP_SPARSE_MMAP;
+			sparse->areas[0].offset =
+					PAGE_ALIGN(vgpu_aperture_offset(vgpu));
+			sparse->areas[0].size = vgpu_aperture_sz(vgpu);
+			break;
+
+		case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = 0;
+			info.flags = 0;
+
+			gvt_dbg_core("get region info bar:%d\n", info.index);
+			break;
+
+		case VFIO_PCI_ROM_REGION_INDEX:
+		case VFIO_PCI_VGA_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = 0;
+			info.flags = 0;
+
+			gvt_dbg_core("get region info index:%d\n", info.index);
+			break;
+		default:
+			{
+				struct vfio_region_info_cap_type cap_type = {
+					.header.id = VFIO_REGION_INFO_CAP_TYPE,
+					.header.version = 1 };
+
+				if (info.index >= VFIO_PCI_NUM_REGIONS +
+						vgpu->num_regions)
+					return -EINVAL;
+				info.index =
+					array_index_nospec(info.index,
+							VFIO_PCI_NUM_REGIONS +
+							vgpu->num_regions);
+
+				i = info.index - VFIO_PCI_NUM_REGIONS;
+
+				info.offset =
+					VFIO_PCI_INDEX_TO_OFFSET(info.index);
+				info.size = vgpu->region[i].size;
+				info.flags = vgpu->region[i].flags;
+
+				cap_type.type = vgpu->region[i].type;
+				cap_type.subtype = vgpu->region[i].subtype;
+
+				ret = vfio_info_add_capability(&caps,
+							&cap_type.header,
+							sizeof(cap_type));
+				if (ret)
+					return ret;
+			}
+		}
+
+		if ((info.flags & VFIO_REGION_INFO_FLAG_CAPS) && sparse) {
+			switch (cap_type_id) {
+			case VFIO_REGION_INFO_CAP_SPARSE_MMAP:
+				ret = vfio_info_add_capability(&caps,
+					&sparse->header,
+					struct_size(sparse, areas,
+						    sparse->nr_areas));
+				if (ret) {
+					kfree(sparse);
+					return ret;
+				}
+				break;
+			default:
+				kfree(sparse);
+				return -EINVAL;
+			}
+		}
+
+		if (caps.size) {
+			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+			if (info.argsz < sizeof(info) + caps.size) {
+				info.argsz = sizeof(info) + caps.size;
+				info.cap_offset = 0;
+			} else {
+				vfio_info_cap_shift(&caps, sizeof(info));
+				if (copy_to_user((void __user *)arg +
+						  sizeof(info), caps.buf,
+						  caps.size)) {
+					kfree(caps.buf);
+					kfree(sparse);
+					return -EFAULT;
+				}
+				info.cap_offset = sizeof(info);
+			}
+
+			kfree(caps.buf);
+		}
+
+		kfree(sparse);
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
+	} else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
+		struct vfio_irq_info info;
+
+		minsz = offsetofend(struct vfio_irq_info, count);
+
+		if (copy_from_user(&info, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
+			return -EINVAL;
+
+		switch (info.index) {
+		case VFIO_PCI_INTX_IRQ_INDEX:
+		case VFIO_PCI_MSI_IRQ_INDEX:
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		info.flags = VFIO_IRQ_INFO_EVENTFD;
+
+		info.count = intel_vgpu_get_irq_count(vgpu, info.index);
+
+		if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
+			info.flags |= (VFIO_IRQ_INFO_MASKABLE |
+				       VFIO_IRQ_INFO_AUTOMASKED);
+		else
+			info.flags |= VFIO_IRQ_INFO_NORESIZE;
+
+		return copy_to_user((void __user *)arg, &info, minsz) ?
+			-EFAULT : 0;
+	} else if (cmd == VFIO_DEVICE_SET_IRQS) {
+		struct vfio_irq_set hdr;
+		u8 *data = NULL;
+		int ret = 0;
+		size_t data_size = 0;
+
+		minsz = offsetofend(struct vfio_irq_set, count);
+
+		if (copy_from_user(&hdr, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (!(hdr.flags & VFIO_IRQ_SET_DATA_NONE)) {
+			int max = intel_vgpu_get_irq_count(vgpu, hdr.index);
+
+			ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
+						VFIO_PCI_NUM_IRQS, &data_size);
+			if (ret) {
+				gvt_vgpu_err("intel:vfio_set_irqs_validate_and_prepare failed\n");
+				return -EINVAL;
+			}
+			if (data_size) {
+				data = memdup_user((void __user *)(arg + minsz),
+						   data_size);
+				if (IS_ERR(data))
+					return PTR_ERR(data);
+			}
+		}
+
+		ret = intel_vgpu_set_irqs(vgpu, hdr.flags, hdr.index,
+					hdr.start, hdr.count, data);
+		kfree(data);
+
+		return ret;
+	} else if (cmd == VFIO_DEVICE_RESET) {
+		intel_gvt_reset_vgpu(vgpu);
+		return 0;
+	} else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
+		struct vfio_device_gfx_plane_info dmabuf;
+		int ret = 0;
+
+		minsz = offsetofend(struct vfio_device_gfx_plane_info,
+				    dmabuf_id);
+		if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
+			return -EFAULT;
+		if (dmabuf.argsz < minsz)
+			return -EINVAL;
+
+		ret = intel_vgpu_query_plane(vgpu, &dmabuf);
+		if (ret != 0)
+			return ret;
+
+		return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
+								-EFAULT : 0;
+	} else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
+		__u32 dmabuf_id;
+
+		if (get_user(dmabuf_id, (__u32 __user *)arg))
+			return -EFAULT;
+		return intel_vgpu_get_dmabuf(vgpu, dmabuf_id);
+	}
+
+	return -ENOTTY;
+}
+
+static ssize_t
+vgpu_id_show(struct device *dev, struct device_attribute *attr,
+	     char *buf)
+{
+	struct intel_vgpu *vgpu = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%d\n", vgpu->id);
+}
+
+static DEVICE_ATTR_RO(vgpu_id);
+
+static struct attribute *intel_vgpu_attrs[] = {
+	&dev_attr_vgpu_id.attr,
+	NULL
+};
+
+static const struct attribute_group intel_vgpu_group = {
+	.name = "intel_vgpu",
+	.attrs = intel_vgpu_attrs,
+};
+
+static const struct attribute_group *intel_vgpu_groups[] = {
+	&intel_vgpu_group,
+	NULL,
+};
+
+static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
+{
+	struct mdev_device *mdev = to_mdev_device(vfio_dev->dev);
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+	struct intel_vgpu_type *type =
+		container_of(mdev->type, struct intel_vgpu_type, type);
+	int ret;
+
+	vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
+	ret = intel_gvt_create_vgpu(vgpu, type->conf);
+	if (ret)
+		return ret;
+
+	kvmgt_protect_table_init(vgpu);
+	gvt_cache_init(vgpu);
+
+	return 0;
+}
+
+static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
+{
+	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+
+	intel_gvt_destroy_vgpu(vgpu);
+}
+
+static const struct vfio_device_ops intel_vgpu_dev_ops = {
+	.init		= intel_vgpu_init_dev,
+	.release	= intel_vgpu_release_dev,
+	.open_device	= intel_vgpu_open_device,
+	.close_device	= intel_vgpu_close_device,
+	.read		= intel_vgpu_read,
+	.write		= intel_vgpu_write,
+	.mmap		= intel_vgpu_mmap,
+	.ioctl		= intel_vgpu_ioctl,
+	.dma_unmap	= intel_vgpu_dma_unmap,
+	.bind_iommufd	= vfio_iommufd_emulated_bind,
+	.unbind_iommufd = vfio_iommufd_emulated_unbind,
+	.attach_ioas	= vfio_iommufd_emulated_attach_ioas,
+	.detach_ioas	= vfio_iommufd_emulated_detach_ioas,
+};
+
+static int intel_vgpu_probe(struct mdev_device *mdev)
+{
+	struct intel_vgpu *vgpu;
+	int ret;
+
+	vgpu = vfio_alloc_device(intel_vgpu, vfio_device, &mdev->dev,
+				 &intel_vgpu_dev_ops);
+	if (IS_ERR(vgpu)) {
+		gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu));
+		return PTR_ERR(vgpu);
+	}
+
+	dev_set_drvdata(&mdev->dev, vgpu);
+	ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device);
+	if (ret)
+		goto out_put_vdev;
+
+	gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
+		     dev_name(mdev_dev(mdev)));
+	return 0;
+
+out_put_vdev:
+	vfio_put_device(&vgpu->vfio_device);
+	return ret;
+}
+
+static void intel_vgpu_remove(struct mdev_device *mdev)
+{
+	struct intel_vgpu *vgpu = dev_get_drvdata(&mdev->dev);
+
+	vfio_unregister_group_dev(&vgpu->vfio_device);
+	vfio_put_device(&vgpu->vfio_device);
+}
+
+static unsigned int intel_vgpu_get_available(struct mdev_type *mtype)
+{
+	struct intel_vgpu_type *type =
+		container_of(mtype, struct intel_vgpu_type, type);
+	struct intel_gvt *gvt = kdev_to_i915(mtype->parent->dev)->gvt;
+	unsigned int low_gm_avail, high_gm_avail, fence_avail;
+
+	mutex_lock(&gvt->lock);
+	low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE -
+		gvt->gm.vgpu_allocated_low_gm_size;
+	high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE -
+		gvt->gm.vgpu_allocated_high_gm_size;
+	fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
+		gvt->fence.vgpu_allocated_fence_num;
+	mutex_unlock(&gvt->lock);
+
+	return min3(low_gm_avail / type->conf->low_mm,
+		    high_gm_avail / type->conf->high_mm,
+		    fence_avail / type->conf->fence);
+}
+
+static struct mdev_driver intel_vgpu_mdev_driver = {
+	.device_api	= VFIO_DEVICE_API_PCI_STRING,
+	.driver = {
+		.name		= "intel_vgpu_mdev",
+		.owner		= THIS_MODULE,
+		.dev_groups	= intel_vgpu_groups,
+	},
+	.probe			= intel_vgpu_probe,
+	.remove			= intel_vgpu_remove,
+	.get_available		= intel_vgpu_get_available,
+	.show_description	= intel_vgpu_show_description,
+};
+
+int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
+{
+	int r;
+
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
+		return -ESRCH;
+
+	if (kvmgt_gfn_is_write_protected(info, gfn))
+		return 0;
+
+	r = kvm_write_track_add_gfn(info->vfio_device.kvm, gfn);
+	if (r)
+		return r;
+
+	kvmgt_protect_table_add(info, gfn);
+	return 0;
+}
+
+int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
+{
+	int r;
+
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, info->status))
+		return -ESRCH;
+
+	if (!kvmgt_gfn_is_write_protected(info, gfn))
+		return 0;
+
+	r = kvm_write_track_remove_gfn(info->vfio_device.kvm, gfn);
+	if (r)
+		return r;
+
+	kvmgt_protect_table_del(info, gfn);
+	return 0;
+}
+
+static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len,
+				   struct kvm_page_track_notifier_node *node)
+{
+	struct intel_vgpu *info =
+		container_of(node, struct intel_vgpu, track_node);
+
+	mutex_lock(&info->vgpu_lock);
+
+	if (kvmgt_gfn_is_write_protected(info, gpa >> PAGE_SHIFT))
+		intel_vgpu_page_track_handler(info, gpa,
+						     (void *)val, len);
+
+	mutex_unlock(&info->vgpu_lock);
+}
+
+static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages,
+					   struct kvm_page_track_notifier_node *node)
+{
+	unsigned long i;
+	struct intel_vgpu *info =
+		container_of(node, struct intel_vgpu, track_node);
+
+	mutex_lock(&info->vgpu_lock);
+
+	for (i = 0; i < nr_pages; i++) {
+		if (kvmgt_gfn_is_write_protected(info, gfn + i))
+			kvmgt_protect_table_del(info, gfn + i);
+	}
+
+	mutex_unlock(&info->vgpu_lock);
+}
+
+void intel_vgpu_detach_regions(struct intel_vgpu *vgpu)
+{
+	int i;
+
+	if (!vgpu->region)
+		return;
+
+	for (i = 0; i < vgpu->num_regions; i++)
+		if (vgpu->region[i].ops->release)
+			vgpu->region[i].ops->release(vgpu,
+					&vgpu->region[i]);
+	vgpu->num_regions = 0;
+	kfree(vgpu->region);
+	vgpu->region = NULL;
+}
+
+int intel_gvt_dma_map_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
+		unsigned long size, dma_addr_t *dma_addr)
+{
+	struct gvt_dma *entry;
+	int ret;
+
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+		return -EINVAL;
+
+	mutex_lock(&vgpu->cache_lock);
+
+	entry = __gvt_cache_find_gfn(vgpu, gfn);
+	if (!entry) {
+		ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
+		if (ret)
+			goto err_unlock;
+
+		ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
+		if (ret)
+			goto err_unmap;
+	} else if (entry->size != size) {
+		/* the same gfn with different size: unmap and re-map */
+		gvt_dma_unmap_page(vgpu, gfn, entry->dma_addr, entry->size);
+		__gvt_cache_remove_entry(vgpu, entry);
+
+		ret = gvt_dma_map_page(vgpu, gfn, dma_addr, size);
+		if (ret)
+			goto err_unlock;
+
+		ret = __gvt_cache_add(vgpu, gfn, *dma_addr, size);
+		if (ret)
+			goto err_unmap;
+	} else {
+		kref_get(&entry->ref);
+		*dma_addr = entry->dma_addr;
+	}
+
+	mutex_unlock(&vgpu->cache_lock);
+	return 0;
+
+err_unmap:
+	gvt_dma_unmap_page(vgpu, gfn, *dma_addr, size);
+err_unlock:
+	mutex_unlock(&vgpu->cache_lock);
+	return ret;
+}
+
+int intel_gvt_dma_pin_guest_page(struct intel_vgpu *vgpu, dma_addr_t dma_addr)
+{
+	struct gvt_dma *entry;
+	int ret = 0;
+
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+		return -EINVAL;
+
+	mutex_lock(&vgpu->cache_lock);
+	entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
+	if (entry)
+		kref_get(&entry->ref);
+	else
+		ret = -ENOMEM;
+	mutex_unlock(&vgpu->cache_lock);
+
+	return ret;
+}
+
+static void __gvt_dma_release(struct kref *ref)
+{
+	struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
+
+	gvt_dma_unmap_page(entry->vgpu, entry->gfn, entry->dma_addr,
+			   entry->size);
+	__gvt_cache_remove_entry(entry->vgpu, entry);
+}
+
+void intel_gvt_dma_unmap_guest_page(struct intel_vgpu *vgpu,
+		dma_addr_t dma_addr)
+{
+	struct gvt_dma *entry;
+
+	if (!test_bit(INTEL_VGPU_STATUS_ATTACHED, vgpu->status))
+		return;
+
+	mutex_lock(&vgpu->cache_lock);
+	entry = __gvt_cache_find_dma_addr(vgpu, dma_addr);
+	if (entry)
+		kref_put(&entry->ref, __gvt_dma_release);
+	mutex_unlock(&vgpu->cache_lock);
+}
+
+static void init_device_info(struct intel_gvt *gvt)
+{
+	struct intel_gvt_device_info *info = &gvt->device_info;
+	struct pci_dev *pdev = to_pci_dev(gvt->gt->i915->drm.dev);
+
+	info->max_support_vgpus = 8;
+	info->cfg_space_size = PCI_CFG_SPACE_EXP_SIZE;
+	info->mmio_size = 2 * 1024 * 1024;
+	info->mmio_bar = 0;
+	info->gtt_start_offset = 8 * 1024 * 1024;
+	info->gtt_entry_size = 8;
+	info->gtt_entry_size_shift = 3;
+	info->gmadr_bytes_in_cmd = 8;
+	info->max_surface_size = 36 * 1024 * 1024;
+	info->msi_cap_offset = pdev->msi_cap;
+}
+
+static void intel_gvt_test_and_emulate_vblank(struct intel_gvt *gvt)
+{
+	struct intel_vgpu *vgpu;
+	int id;
+
+	mutex_lock(&gvt->lock);
+	idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) {
+		if (test_and_clear_bit(INTEL_GVT_REQUEST_EMULATE_VBLANK + id,
+				       (void *)&gvt->service_request)) {
+			if (test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status))
+				intel_vgpu_emulate_vblank(vgpu);
+		}
+	}
+	mutex_unlock(&gvt->lock);
+}
+
+static int gvt_service_thread(void *data)
+{
+	struct intel_gvt *gvt = (struct intel_gvt *)data;
+	int ret;
+
+	gvt_dbg_core("service thread start\n");
+
+	while (!kthread_should_stop()) {
+		ret = wait_event_interruptible(gvt->service_thread_wq,
+				kthread_should_stop() || gvt->service_request);
+
+		if (kthread_should_stop())
+			break;
+
+		if (WARN_ONCE(ret, "service thread is waken up by signal.\n"))
+			continue;
+
+		intel_gvt_test_and_emulate_vblank(gvt);
+
+		if (test_bit(INTEL_GVT_REQUEST_SCHED,
+				(void *)&gvt->service_request) ||
+			test_bit(INTEL_GVT_REQUEST_EVENT_SCHED,
+					(void *)&gvt->service_request)) {
+			intel_gvt_schedule(gvt);
+		}
+	}
+
+	return 0;
+}
+
+static void clean_service_thread(struct intel_gvt *gvt)
+{
+	kthread_stop(gvt->service_thread);
+}
+
+static int init_service_thread(struct intel_gvt *gvt)
+{
+	init_waitqueue_head(&gvt->service_thread_wq);
+
+	gvt->service_thread = kthread_run(gvt_service_thread,
+			gvt, "gvt_service_thread");
+	if (IS_ERR(gvt->service_thread)) {
+		gvt_err("fail to start service thread.\n");
+		return PTR_ERR(gvt->service_thread);
+	}
+	return 0;
+}
+
+/**
+ * intel_gvt_clean_device - clean a GVT device
+ * @i915: i915 private
+ *
+ * This function is called at the driver unloading stage, to free the
+ * resources owned by a GVT device.
+ *
+ */
+static void intel_gvt_clean_device(struct drm_i915_private *i915)
+{
+	struct intel_gvt *gvt = fetch_and_zero(&i915->gvt);
+
+	if (drm_WARN_ON(&i915->drm, !gvt))
+		return;
+
+	mdev_unregister_parent(&gvt->parent);
+	intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
+	intel_gvt_clean_vgpu_types(gvt);
+
+	intel_gvt_debugfs_clean(gvt);
+	clean_service_thread(gvt);
+	intel_gvt_clean_cmd_parser(gvt);
+	intel_gvt_clean_sched_policy(gvt);
+	intel_gvt_clean_workload_scheduler(gvt);
+	intel_gvt_clean_gtt(gvt);
+	intel_gvt_free_firmware(gvt);
+	intel_gvt_clean_mmio_info(gvt);
+	idr_destroy(&gvt->vgpu_idr);
+
+	kfree(i915->gvt);
+}
+
+/**
+ * intel_gvt_init_device - initialize a GVT device
+ * @i915: drm i915 private data
+ *
+ * This function is called at the initialization stage, to initialize
+ * necessary GVT components.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+static int intel_gvt_init_device(struct drm_i915_private *i915)
+{
+	struct intel_gvt *gvt;
+	struct intel_vgpu *vgpu;
+	int ret;
+
+	if (drm_WARN_ON(&i915->drm, i915->gvt))
+		return -EEXIST;
+
+	gvt = kzalloc(sizeof(struct intel_gvt), GFP_KERNEL);
+	if (!gvt)
+		return -ENOMEM;
+
+	gvt_dbg_core("init gvt device\n");
+
+	idr_init_base(&gvt->vgpu_idr, 1);
+	spin_lock_init(&gvt->scheduler.mmio_context_lock);
+	mutex_init(&gvt->lock);
+	mutex_init(&gvt->sched_lock);
+	gvt->gt = to_gt(i915);
+	i915->gvt = gvt;
+
+	init_device_info(gvt);
+
+	ret = intel_gvt_setup_mmio_info(gvt);
+	if (ret)
+		goto out_clean_idr;
+
+	intel_gvt_init_engine_mmio_context(gvt);
+
+	ret = intel_gvt_load_firmware(gvt);
+	if (ret)
+		goto out_clean_mmio_info;
+
+	ret = intel_gvt_init_irq(gvt);
+	if (ret)
+		goto out_free_firmware;
+
+	ret = intel_gvt_init_gtt(gvt);
+	if (ret)
+		goto out_free_firmware;
+
+	ret = intel_gvt_init_workload_scheduler(gvt);
+	if (ret)
+		goto out_clean_gtt;
+
+	ret = intel_gvt_init_sched_policy(gvt);
+	if (ret)
+		goto out_clean_workload_scheduler;
+
+	ret = intel_gvt_init_cmd_parser(gvt);
+	if (ret)
+		goto out_clean_sched_policy;
+
+	ret = init_service_thread(gvt);
+	if (ret)
+		goto out_clean_cmd_parser;
+
+	ret = intel_gvt_init_vgpu_types(gvt);
+	if (ret)
+		goto out_clean_thread;
+
+	vgpu = intel_gvt_create_idle_vgpu(gvt);
+	if (IS_ERR(vgpu)) {
+		ret = PTR_ERR(vgpu);
+		gvt_err("failed to create idle vgpu\n");
+		goto out_clean_types;
+	}
+	gvt->idle_vgpu = vgpu;
+
+	intel_gvt_debugfs_init(gvt);
+
+	ret = mdev_register_parent(&gvt->parent, i915->drm.dev,
+				   &intel_vgpu_mdev_driver,
+				   gvt->mdev_types, gvt->num_types);
+	if (ret)
+		goto out_destroy_idle_vgpu;
+
+	gvt_dbg_core("gvt device initialization is done\n");
+	return 0;
+
+out_destroy_idle_vgpu:
+	intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
+	intel_gvt_debugfs_clean(gvt);
+out_clean_types:
+	intel_gvt_clean_vgpu_types(gvt);
+out_clean_thread:
+	clean_service_thread(gvt);
+out_clean_cmd_parser:
+	intel_gvt_clean_cmd_parser(gvt);
+out_clean_sched_policy:
+	intel_gvt_clean_sched_policy(gvt);
+out_clean_workload_scheduler:
+	intel_gvt_clean_workload_scheduler(gvt);
+out_clean_gtt:
+	intel_gvt_clean_gtt(gvt);
+out_free_firmware:
+	intel_gvt_free_firmware(gvt);
+out_clean_mmio_info:
+	intel_gvt_clean_mmio_info(gvt);
+out_clean_idr:
+	idr_destroy(&gvt->vgpu_idr);
+	kfree(gvt);
+	i915->gvt = NULL;
+	return ret;
+}
+
+static void intel_gvt_pm_resume(struct drm_i915_private *i915)
+{
+	struct intel_gvt *gvt = i915->gvt;
+
+	intel_gvt_restore_fence(gvt);
+	intel_gvt_restore_mmio(gvt);
+	intel_gvt_restore_ggtt(gvt);
+}
+
+static const struct intel_vgpu_ops intel_gvt_vgpu_ops = {
+	.init_device	= intel_gvt_init_device,
+	.clean_device	= intel_gvt_clean_device,
+	.pm_resume	= intel_gvt_pm_resume,
+};
+
+static int __init kvmgt_init(void)
+{
+	int ret;
+
+	ret = intel_gvt_set_ops(&intel_gvt_vgpu_ops);
+	if (ret)
+		return ret;
+
+	ret = mdev_register_driver(&intel_vgpu_mdev_driver);
+	if (ret)
+		intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
+	return ret;
+}
+
+static void __exit kvmgt_exit(void)
+{
+	mdev_unregister_driver(&intel_vgpu_mdev_driver);
+	intel_gvt_clear_ops(&intel_gvt_vgpu_ops);
+}
+
+module_init(kvmgt_init);
+module_exit(kvmgt_exit);
+
+MODULE_LICENSE("GPL and additional rights");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
new file mode 100644
index 0000000000..5b5def6dde
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Ke Yu
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Dexuan Cui
+ *
+ * Contributors:
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Min He <min.he@intel.com>
+ *    Niu Bing <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gvt.h"
+
+#include "display/intel_dpio_phy.h"
+#include "gt/intel_gt_regs.h"
+
+/**
+ * intel_vgpu_gpa_to_mmio_offset - translate a GPA to MMIO offset
+ * @vgpu: a vGPU
+ * @gpa: guest physical address
+ *
+ * Returns:
+ * Zero on success, negative error code if failed
+ */
+int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa)
+{
+	u64 gttmmio_gpa = intel_vgpu_get_bar_gpa(vgpu, PCI_BASE_ADDRESS_0);
+	return gpa - gttmmio_gpa;
+}
+
+#define reg_is_mmio(gvt, reg)  \
+	(reg >= 0 && reg < gvt->device_info.mmio_size)
+
+#define reg_is_gtt(gvt, reg)   \
+	(reg >= gvt->device_info.gtt_start_offset \
+	 && reg < gvt->device_info.gtt_start_offset + gvt_ggtt_sz(gvt))
+
+static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, u64 pa,
+		void *p_data, unsigned int bytes, bool read)
+{
+	struct intel_gvt *gvt = NULL;
+	void *pt = NULL;
+	unsigned int offset = 0;
+
+	if (!vgpu || !p_data)
+		return;
+
+	gvt = vgpu->gvt;
+	mutex_lock(&vgpu->vgpu_lock);
+	offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
+	if (reg_is_mmio(gvt, offset)) {
+		if (read)
+			intel_vgpu_default_mmio_read(vgpu, offset, p_data,
+					bytes);
+		else
+			intel_vgpu_default_mmio_write(vgpu, offset, p_data,
+					bytes);
+	} else if (reg_is_gtt(gvt, offset)) {
+		offset -= gvt->device_info.gtt_start_offset;
+		pt = vgpu->gtt.ggtt_mm->ggtt_mm.virtual_ggtt + offset;
+		if (read)
+			memcpy(p_data, pt, bytes);
+		else
+			memcpy(pt, p_data, bytes);
+
+	}
+	mutex_unlock(&vgpu->vgpu_lock);
+}
+
+/**
+ * intel_vgpu_emulate_mmio_read - emulate MMIO read
+ * @vgpu: a vGPU
+ * @pa: guest physical address
+ * @p_data: data return buffer
+ * @bytes: access data length
+ *
+ * Returns:
+ * Zero on success, negative error code if failed
+ */
+int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa,
+		void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_i915_private *i915 = gvt->gt->i915;
+	unsigned int offset = 0;
+	int ret = -EINVAL;
+
+	if (vgpu->failsafe) {
+		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true);
+		return 0;
+	}
+	mutex_lock(&vgpu->vgpu_lock);
+
+	offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
+
+	if (drm_WARN_ON(&i915->drm, bytes > 8))
+		goto err;
+
+	if (reg_is_gtt(gvt, offset)) {
+		if (drm_WARN_ON(&i915->drm, !IS_ALIGNED(offset, 4) &&
+				!IS_ALIGNED(offset, 8)))
+			goto err;
+		if (drm_WARN_ON(&i915->drm, bytes != 4 && bytes != 8))
+			goto err;
+		if (drm_WARN_ON(&i915->drm,
+				!reg_is_gtt(gvt, offset + bytes - 1)))
+			goto err;
+
+		ret = intel_vgpu_emulate_ggtt_mmio_read(vgpu, offset,
+				p_data, bytes);
+		if (ret)
+			goto err;
+		goto out;
+	}
+
+	if (drm_WARN_ON_ONCE(&i915->drm, !reg_is_mmio(gvt, offset))) {
+		ret = intel_gvt_read_gpa(vgpu, pa, p_data, bytes);
+		goto out;
+	}
+
+	if (drm_WARN_ON(&i915->drm, !reg_is_mmio(gvt, offset + bytes - 1)))
+		goto err;
+
+	if (!intel_gvt_mmio_is_unalign(gvt, offset)) {
+		if (drm_WARN_ON(&i915->drm, !IS_ALIGNED(offset, bytes)))
+			goto err;
+	}
+
+	ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, true);
+	if (ret < 0)
+		goto err;
+
+	intel_gvt_mmio_set_accessed(gvt, offset);
+	ret = 0;
+	goto out;
+
+err:
+	gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n",
+			offset, bytes);
+out:
+	mutex_unlock(&vgpu->vgpu_lock);
+	return ret;
+}
+
+/**
+ * intel_vgpu_emulate_mmio_write - emulate MMIO write
+ * @vgpu: a vGPU
+ * @pa: guest physical address
+ * @p_data: write data buffer
+ * @bytes: access data length
+ *
+ * Returns:
+ * Zero on success, negative error code if failed
+ */
+int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa,
+		void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_i915_private *i915 = gvt->gt->i915;
+	unsigned int offset = 0;
+	int ret = -EINVAL;
+
+	if (vgpu->failsafe) {
+		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, false);
+		return 0;
+	}
+
+	mutex_lock(&vgpu->vgpu_lock);
+
+	offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
+
+	if (drm_WARN_ON(&i915->drm, bytes > 8))
+		goto err;
+
+	if (reg_is_gtt(gvt, offset)) {
+		if (drm_WARN_ON(&i915->drm, !IS_ALIGNED(offset, 4) &&
+				!IS_ALIGNED(offset, 8)))
+			goto err;
+		if (drm_WARN_ON(&i915->drm, bytes != 4 && bytes != 8))
+			goto err;
+		if (drm_WARN_ON(&i915->drm,
+				!reg_is_gtt(gvt, offset + bytes - 1)))
+			goto err;
+
+		ret = intel_vgpu_emulate_ggtt_mmio_write(vgpu, offset,
+				p_data, bytes);
+		if (ret)
+			goto err;
+		goto out;
+	}
+
+	if (drm_WARN_ON_ONCE(&i915->drm, !reg_is_mmio(gvt, offset))) {
+		ret = intel_gvt_write_gpa(vgpu, pa, p_data, bytes);
+		goto out;
+	}
+
+	ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, false);
+	if (ret < 0)
+		goto err;
+
+	intel_gvt_mmio_set_accessed(gvt, offset);
+	ret = 0;
+	goto out;
+err:
+	gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset,
+		     bytes);
+out:
+	mutex_unlock(&vgpu->vgpu_lock);
+	return ret;
+}
+
+
+/**
+ * intel_vgpu_reset_mmio - reset virtual MMIO space
+ * @vgpu: a vGPU
+ * @dmlr: whether this is device model level reset
+ */
+void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	const struct intel_gvt_device_info *info = &gvt->device_info;
+	void  *mmio = gvt->firmware.mmio;
+
+	if (dmlr) {
+		memcpy(vgpu->mmio.vreg, mmio, info->mmio_size);
+
+		vgpu_vreg_t(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0;
+
+		/* set the bit 0:2(Core C-State ) to C0 */
+		vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0;
+
+		/* uc reset hw expect GS_MIA_IN_RESET */
+		vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET;
+
+		if (IS_BROXTON(vgpu->gvt->gt->i915)) {
+			vgpu_vreg_t(vgpu, BXT_P_CR_GT_DISP_PWRON) &=
+				    ~(BIT(0) | BIT(1));
+			vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY0)) &=
+				    ~PHY_POWER_GOOD;
+			vgpu_vreg_t(vgpu, BXT_PORT_CL1CM_DW0(DPIO_PHY1)) &=
+				    ~PHY_POWER_GOOD;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY0)) &=
+				    ~BIT(30);
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL_FAMILY(DPIO_PHY1)) &=
+				    ~BIT(30);
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_A)) &=
+				    ~BXT_PHY_LANE_ENABLED;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_A)) |=
+				    BXT_PHY_CMNLANE_POWERDOWN_ACK |
+				    BXT_PHY_LANE_POWERDOWN_ACK;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_B)) &=
+				    ~BXT_PHY_LANE_ENABLED;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_B)) |=
+				    BXT_PHY_CMNLANE_POWERDOWN_ACK |
+				    BXT_PHY_LANE_POWERDOWN_ACK;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) &=
+				    ~BXT_PHY_LANE_ENABLED;
+			vgpu_vreg_t(vgpu, BXT_PHY_CTL(PORT_C)) |=
+				    BXT_PHY_CMNLANE_POWERDOWN_ACK |
+				    BXT_PHY_LANE_POWERDOWN_ACK;
+			vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |=
+				SKL_FUSE_DOWNLOAD_STATUS |
+				SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
+				SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
+				SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
+		}
+	} else {
+#define GVT_GEN8_MMIO_RESET_OFFSET		(0x44200)
+		/* only reset the engine related, so starting with 0x44200
+		 * interrupt include DE,display mmio related will not be
+		 * touched
+		 */
+		memcpy(vgpu->mmio.vreg, mmio, GVT_GEN8_MMIO_RESET_OFFSET);
+	}
+
+}
+
+/**
+ * intel_vgpu_init_mmio - init MMIO  space
+ * @vgpu: a vGPU
+ *
+ * Returns:
+ * Zero on success, negative error code if failed
+ */
+int intel_vgpu_init_mmio(struct intel_vgpu *vgpu)
+{
+	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
+
+	vgpu->mmio.vreg = vzalloc(info->mmio_size);
+	if (!vgpu->mmio.vreg)
+		return -ENOMEM;
+
+	intel_vgpu_reset_mmio(vgpu, true);
+
+	return 0;
+}
+
+/**
+ * intel_vgpu_clean_mmio - clean MMIO space
+ * @vgpu: a vGPU
+ *
+ */
+void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu)
+{
+	vfree(vgpu->mmio.vreg);
+	vgpu->mmio.vreg = NULL;
+}
diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h
new file mode 100644
index 0000000000..bba154e387
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Ke Yu
+ *    Kevin Tian <kevin.tian@intel.com>
+ *    Dexuan Cui
+ *
+ * Contributors:
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Min He <min.he@intel.com>
+ *    Niu Bing <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#ifndef _GVT_MMIO_H_
+#define _GVT_MMIO_H_
+
+#include <linux/types.h>
+
+struct intel_gvt;
+struct intel_vgpu;
+
+#define D_BDW   (1 << 0)
+#define D_SKL	(1 << 1)
+#define D_KBL	(1 << 2)
+#define D_BXT	(1 << 3)
+#define D_CFL	(1 << 4)
+
+#define D_GEN9PLUS	(D_SKL | D_KBL | D_BXT | D_CFL)
+#define D_GEN8PLUS	(D_BDW | D_SKL | D_KBL | D_BXT | D_CFL)
+
+#define D_SKL_PLUS	(D_SKL | D_KBL | D_BXT | D_CFL)
+#define D_BDW_PLUS	(D_BDW | D_SKL | D_KBL | D_BXT | D_CFL)
+
+#define D_PRE_SKL	(D_BDW)
+#define D_ALL		(D_BDW | D_SKL | D_KBL | D_BXT | D_CFL)
+
+typedef int (*gvt_mmio_func)(struct intel_vgpu *, unsigned int, void *,
+			     unsigned int);
+
+struct intel_gvt_mmio_info {
+	u32 offset;
+	u64 ro_mask;
+	u32 device;
+	gvt_mmio_func read;
+	gvt_mmio_func write;
+	u32 addr_range;
+	struct hlist_node node;
+};
+
+const struct intel_engine_cs *
+intel_gvt_render_mmio_to_engine(struct intel_gvt *gvt, unsigned int reg);
+unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt);
+
+int intel_gvt_setup_mmio_info(struct intel_gvt *gvt);
+void intel_gvt_clean_mmio_info(struct intel_gvt *gvt);
+int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt,
+	int (*handler)(struct intel_gvt *gvt, u32 offset, void *data),
+	void *data);
+
+struct intel_gvt_mmio_info *intel_gvt_find_mmio_info(struct intel_gvt *gvt,
+						  unsigned int offset);
+
+int intel_vgpu_init_mmio(struct intel_vgpu *vgpu);
+void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr);
+void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu);
+
+int intel_vgpu_gpa_to_mmio_offset(struct intel_vgpu *vgpu, u64 gpa);
+
+int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, u64 pa,
+				void *p_data, unsigned int bytes);
+int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, u64 pa,
+				void *p_data, unsigned int bytes);
+
+int intel_vgpu_default_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
+				 void *p_data, unsigned int bytes);
+int intel_vgpu_default_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+				  void *p_data, unsigned int bytes);
+
+bool intel_gvt_in_force_nonpriv_whitelist(struct intel_gvt *gvt,
+					  unsigned int offset);
+
+int intel_vgpu_mmio_reg_rw(struct intel_vgpu *vgpu, unsigned int offset,
+			   void *pdata, unsigned int bytes, bool is_read);
+
+int intel_vgpu_mask_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
+				  void *p_data, unsigned int bytes);
+
+void intel_gvt_restore_fence(struct intel_gvt *gvt);
+void intel_gvt_restore_mmio(struct intel_gvt *gvt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
new file mode 100644
index 0000000000..490e8ae512
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eddie Dong <eddie.dong@intel.com>
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *    Changbin Du <changbin.du@intel.com>
+ *    Zhenyu Wang <zhenyuw@linux.intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "i915_reg.h"
+#include "gt/intel_context.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_ring.h"
+#include "gvt.h"
+#include "trace.h"
+
+#define GEN9_MOCS_SIZE		64
+
+/* Raw offset is appened to each line for convenience. */
+static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = {
+	{RCS0, RING_MODE_GEN7(RENDER_RING_BASE), 0xffff, false}, /* 0x229c */
+	{RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS0, HWSTAM, 0x0, false}, /* 0x2098 */
+	{RCS0, INSTPM, 0xffff, true}, /* 0x20c0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */
+	{RCS0, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */
+	{RCS0, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */
+	{RCS0, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */
+	{RCS0, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */
+	{RCS0, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */
+	{RCS0, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */
+
+	{BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
+	{BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
+	{BCS0, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */
+	{BCS0, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */
+	{BCS0, RING_EXCC(BLT_RING_BASE), 0xffff, false}, /* 0x22028 */
+	{RCS0, INVALID_MMIO_REG, 0, false } /* Terminated */
+};
+
+static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
+	{RCS0, RING_MODE_GEN7(RENDER_RING_BASE), 0xffff, false}, /* 0x229c */
+	{RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS0, HWSTAM, 0x0, false}, /* 0x2098 */
+	{RCS0, INSTPM, 0xffff, true}, /* 0x20c0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */
+	{RCS0, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */
+	{RCS0, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */
+	{RCS0, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */
+	{RCS0, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */
+	{RCS0, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */
+	{RCS0, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */
+	{RCS0, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */
+
+	{RCS0, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */
+	{RCS0, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */
+	{RCS0, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
+	{RCS0, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
+	{RCS0, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
+	{RCS0, _MMIO(0xb118), 0, false}, /* GEN8_L3SQCREG4 */
+	{RCS0, _MMIO(0xb11c), 0, false}, /* GEN9_SCRATCH1 */
+	{RCS0, GEN9_SCRATCH_LNCF1, 0, false}, /* 0xb008 */
+	{RCS0, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
+	{RCS0, _MMIO(0xe180), 0xffff, true}, /* HALF_SLICE_CHICKEN2 */
+	{RCS0, _MMIO(0xe184), 0xffff, true}, /* GEN8_HALF_SLICE_CHICKEN3 */
+	{RCS0, _MMIO(0xe188), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN5 */
+	{RCS0, _MMIO(0xe194), 0xffff, true}, /* GEN9_HALF_SLICE_CHICKEN7 */
+	{RCS0, _MMIO(0xe4f0), 0xffff, true}, /* GEN8_ROW_CHICKEN */
+	{RCS0, TRVATTL3PTRDW(0), 0, true}, /* 0x4de0 */
+	{RCS0, TRVATTL3PTRDW(1), 0, true}, /* 0x4de4 */
+	{RCS0, TRNULLDETCT, 0, true}, /* 0x4de8 */
+	{RCS0, TRINVTILEDETCT, 0, true}, /* 0x4dec */
+	{RCS0, TRVADR, 0, true}, /* 0x4df0 */
+	{RCS0, TRTTE, 0, true}, /* 0x4df4 */
+	{RCS0, _MMIO(0x4dfc), 0, true},
+
+	{BCS0, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
+	{BCS0, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
+	{BCS0, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */
+	{BCS0, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */
+	{BCS0, RING_EXCC(BLT_RING_BASE), 0xffff, false}, /* 0x22028 */
+
+	{VCS1, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */
+
+	{VECS0, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */
+
+	{RCS0, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */
+	{RCS0, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS0, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */
+	{RCS0, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */
+
+	{RCS0, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
+	{RCS0, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */
+	{RCS0, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */
+
+	{RCS0, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
+	{RCS0, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
+	{RCS0, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */
+	{RCS0, INVALID_MMIO_REG, 0, false } /* Terminated */
+};
+
+static struct {
+	bool initialized;
+	u32 control_table[I915_NUM_ENGINES][GEN9_MOCS_SIZE];
+	u32 l3cc_table[GEN9_MOCS_SIZE / 2];
+} gen9_render_mocs;
+
+static u32 gen9_mocs_mmio_offset_list[] = {
+	[RCS0]  = 0xc800,
+	[VCS0]  = 0xc900,
+	[VCS1]  = 0xca00,
+	[BCS0]  = 0xcc00,
+	[VECS0] = 0xcb00,
+};
+
+static void load_render_mocs(const struct intel_engine_cs *engine)
+{
+	struct intel_gvt *gvt = engine->i915->gvt;
+	struct intel_uncore *uncore = engine->uncore;
+	u32 cnt = gvt->engine_mmio_list.mocs_mmio_offset_list_cnt;
+	u32 *regs = gvt->engine_mmio_list.mocs_mmio_offset_list;
+	i915_reg_t offset;
+	int ring_id, i;
+
+	/* Platform doesn't have mocs mmios. */
+	if (!regs)
+		return;
+
+	for (ring_id = 0; ring_id < cnt; ring_id++) {
+		if (!HAS_ENGINE(engine->gt, ring_id))
+			continue;
+
+		offset.reg = regs[ring_id];
+		for (i = 0; i < GEN9_MOCS_SIZE; i++) {
+			gen9_render_mocs.control_table[ring_id][i] =
+				intel_uncore_read_fw(uncore, offset);
+			offset.reg += 4;
+		}
+	}
+
+	offset.reg = 0xb020;
+	for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) {
+		gen9_render_mocs.l3cc_table[i] =
+			intel_uncore_read_fw(uncore, offset);
+		offset.reg += 4;
+	}
+	gen9_render_mocs.initialized = true;
+}
+
+static int
+restore_context_mmio_for_inhibit(struct intel_vgpu *vgpu,
+				 struct i915_request *req)
+{
+	u32 *cs;
+	int ret;
+	struct engine_mmio *mmio;
+	struct intel_gvt *gvt = vgpu->gvt;
+	int ring_id = req->engine->id;
+	int count = gvt->engine_mmio_list.ctx_mmio_count[ring_id];
+
+	if (count == 0)
+		return 0;
+
+	ret = req->engine->emit_flush(req, EMIT_BARRIER);
+	if (ret)
+		return ret;
+
+	cs = intel_ring_begin(req, count * 2 + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(count);
+	for (mmio = gvt->engine_mmio_list.mmio;
+	     i915_mmio_reg_valid(mmio->reg); mmio++) {
+		if (mmio->id != ring_id || !mmio->in_context)
+			continue;
+
+		*cs++ = i915_mmio_reg_offset(mmio->reg);
+		*cs++ = vgpu_vreg_t(vgpu, mmio->reg) | (mmio->mask << 16);
+		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
+			      *(cs-2), *(cs-1), vgpu->id, ring_id);
+	}
+
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	ret = req->engine->emit_flush(req, EMIT_BARRIER);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int
+restore_render_mocs_control_for_inhibit(struct intel_vgpu *vgpu,
+					struct i915_request *req)
+{
+	unsigned int index;
+	u32 *cs;
+
+	cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE);
+
+	for (index = 0; index < GEN9_MOCS_SIZE; index++) {
+		*cs++ = i915_mmio_reg_offset(GEN9_GFX_MOCS(index));
+		*cs++ = vgpu_vreg_t(vgpu, GEN9_GFX_MOCS(index));
+		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
+			      *(cs-2), *(cs-1), vgpu->id, req->engine->id);
+
+	}
+
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	return 0;
+}
+
+static int
+restore_render_mocs_l3cc_for_inhibit(struct intel_vgpu *vgpu,
+				     struct i915_request *req)
+{
+	unsigned int index;
+	u32 *cs;
+
+	cs = intel_ring_begin(req, 2 * GEN9_MOCS_SIZE / 2 + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(GEN9_MOCS_SIZE / 2);
+
+	for (index = 0; index < GEN9_MOCS_SIZE / 2; index++) {
+		*cs++ = i915_mmio_reg_offset(GEN9_LNCFCMOCS(index));
+		*cs++ = vgpu_vreg_t(vgpu, GEN9_LNCFCMOCS(index));
+		gvt_dbg_core("add lri reg pair 0x%x:0x%x in inhibit ctx, vgpu:%d, rind_id:%d\n",
+			      *(cs-2), *(cs-1), vgpu->id, req->engine->id);
+
+	}
+
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	return 0;
+}
+
+/*
+ * Use lri command to initialize the mmio which is in context state image for
+ * inhibit context, it contains tracked engine mmio, render_mocs and
+ * render_mocs_l3cc.
+ */
+int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
+				       struct i915_request *req)
+{
+	int ret;
+	u32 *cs;
+
+	cs = intel_ring_begin(req, 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	ret = restore_context_mmio_for_inhibit(vgpu, req);
+	if (ret)
+		goto out;
+
+	/* no MOCS register in context except render engine */
+	if (req->engine->id != RCS0)
+		goto out;
+
+	ret = restore_render_mocs_control_for_inhibit(vgpu, req);
+	if (ret)
+		goto out;
+
+	ret = restore_render_mocs_l3cc_for_inhibit(vgpu, req);
+	if (ret)
+		goto out;
+
+out:
+	cs = intel_ring_begin(req, 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+	*cs++ = MI_NOOP;
+	intel_ring_advance(req, cs);
+
+	return ret;
+}
+
+static u32 gen8_tlb_mmio_offset_list[] = {
+	[RCS0]  = 0x4260,
+	[VCS0]  = 0x4264,
+	[VCS1]  = 0x4268,
+	[BCS0]  = 0x426c,
+	[VECS0] = 0x4270,
+};
+
+static void handle_tlb_pending_event(struct intel_vgpu *vgpu,
+				     const struct intel_engine_cs *engine)
+{
+	struct intel_uncore *uncore = engine->uncore;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	u32 *regs = vgpu->gvt->engine_mmio_list.tlb_mmio_offset_list;
+	u32 cnt = vgpu->gvt->engine_mmio_list.tlb_mmio_offset_list_cnt;
+	enum forcewake_domains fw;
+	i915_reg_t reg;
+
+	if (!regs)
+		return;
+
+	if (drm_WARN_ON(&engine->i915->drm, engine->id >= cnt))
+		return;
+
+	if (!test_and_clear_bit(engine->id, (void *)s->tlb_handle_pending))
+		return;
+
+	reg = _MMIO(regs[engine->id]);
+
+	/* WaForceWakeRenderDuringMmioTLBInvalidate:skl
+	 * we need to put a forcewake when invalidating RCS TLB caches,
+	 * otherwise device can go to RC6 state and interrupt invalidation
+	 * process
+	 */
+	fw = intel_uncore_forcewake_for_reg(uncore, reg,
+					    FW_REG_READ | FW_REG_WRITE);
+	if (engine->id == RCS0 && GRAPHICS_VER(engine->i915) >= 9)
+		fw |= FORCEWAKE_RENDER;
+
+	intel_uncore_forcewake_get(uncore, fw);
+
+	intel_uncore_write_fw(uncore, reg, 0x1);
+
+	if (wait_for_atomic(intel_uncore_read_fw(uncore, reg) == 0, 50))
+		gvt_vgpu_err("timeout in invalidate ring %s tlb\n",
+			     engine->name);
+	else
+		vgpu_vreg_t(vgpu, reg) = 0;
+
+	intel_uncore_forcewake_put(uncore, fw);
+
+	gvt_dbg_core("invalidate TLB for ring %s\n", engine->name);
+}
+
+static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
+			const struct intel_engine_cs *engine)
+{
+	u32 regs[] = {
+		[RCS0]  = 0xc800,
+		[VCS0]  = 0xc900,
+		[VCS1]  = 0xca00,
+		[BCS0]  = 0xcc00,
+		[VECS0] = 0xcb00,
+	};
+	struct intel_uncore *uncore = engine->uncore;
+	i915_reg_t offset, l3_offset;
+	u32 old_v, new_v;
+	int i;
+
+	if (drm_WARN_ON(&engine->i915->drm, engine->id >= ARRAY_SIZE(regs)))
+		return;
+
+	if (engine->id == RCS0 && GRAPHICS_VER(engine->i915) == 9)
+		return;
+
+	if (!pre && !gen9_render_mocs.initialized)
+		load_render_mocs(engine);
+
+	offset.reg = regs[engine->id];
+	for (i = 0; i < GEN9_MOCS_SIZE; i++) {
+		if (pre)
+			old_v = vgpu_vreg_t(pre, offset);
+		else
+			old_v = gen9_render_mocs.control_table[engine->id][i];
+		if (next)
+			new_v = vgpu_vreg_t(next, offset);
+		else
+			new_v = gen9_render_mocs.control_table[engine->id][i];
+
+		if (old_v != new_v)
+			intel_uncore_write_fw(uncore, offset, new_v);
+
+		offset.reg += 4;
+	}
+
+	if (engine->id == RCS0) {
+		l3_offset.reg = 0xb020;
+		for (i = 0; i < GEN9_MOCS_SIZE / 2; i++) {
+			if (pre)
+				old_v = vgpu_vreg_t(pre, l3_offset);
+			else
+				old_v = gen9_render_mocs.l3cc_table[i];
+			if (next)
+				new_v = vgpu_vreg_t(next, l3_offset);
+			else
+				new_v = gen9_render_mocs.l3cc_table[i];
+
+			if (old_v != new_v)
+				intel_uncore_write_fw(uncore, l3_offset, new_v);
+
+			l3_offset.reg += 4;
+		}
+	}
+}
+
+#define CTX_CONTEXT_CONTROL_VAL	0x03
+
+bool is_inhibit_context(struct intel_context *ce)
+{
+	const u32 *reg_state = ce->lrc_reg_state;
+	u32 inhibit_mask =
+		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+
+	return inhibit_mask ==
+		(reg_state[CTX_CONTEXT_CONTROL_VAL] & inhibit_mask);
+}
+
+/* Switch ring mmio values (context). */
+static void switch_mmio(struct intel_vgpu *pre,
+			struct intel_vgpu *next,
+			const struct intel_engine_cs *engine)
+{
+	struct intel_uncore *uncore = engine->uncore;
+	struct intel_vgpu_submission *s;
+	struct engine_mmio *mmio;
+	u32 old_v, new_v;
+
+	if (GRAPHICS_VER(engine->i915) >= 9)
+		switch_mocs(pre, next, engine);
+
+	for (mmio = engine->i915->gvt->engine_mmio_list.mmio;
+	     i915_mmio_reg_valid(mmio->reg); mmio++) {
+		if (mmio->id != engine->id)
+			continue;
+		/*
+		 * No need to do save or restore of the mmio which is in context
+		 * state image on gen9, it's initialized by lri command and
+		 * save or restore with context together.
+		 */
+		if (GRAPHICS_VER(engine->i915) == 9 && mmio->in_context)
+			continue;
+
+		// save
+		if (pre) {
+			vgpu_vreg_t(pre, mmio->reg) =
+				intel_uncore_read_fw(uncore, mmio->reg);
+			if (mmio->mask)
+				vgpu_vreg_t(pre, mmio->reg) &=
+					~(mmio->mask << 16);
+			old_v = vgpu_vreg_t(pre, mmio->reg);
+		} else {
+			old_v = mmio->value =
+				intel_uncore_read_fw(uncore, mmio->reg);
+		}
+
+		// restore
+		if (next) {
+			s = &next->submission;
+			/*
+			 * No need to restore the mmio which is in context state
+			 * image if it's not inhibit context, it will restore
+			 * itself.
+			 */
+			if (mmio->in_context &&
+			    !is_inhibit_context(s->shadow[engine->id]))
+				continue;
+
+			if (mmio->mask)
+				new_v = vgpu_vreg_t(next, mmio->reg) |
+					(mmio->mask << 16);
+			else
+				new_v = vgpu_vreg_t(next, mmio->reg);
+		} else {
+			if (mmio->in_context)
+				continue;
+			if (mmio->mask)
+				new_v = mmio->value | (mmio->mask << 16);
+			else
+				new_v = mmio->value;
+		}
+
+		intel_uncore_write_fw(uncore, mmio->reg, new_v);
+
+		trace_render_mmio(pre ? pre->id : 0,
+				  next ? next->id : 0,
+				  "switch",
+				  i915_mmio_reg_offset(mmio->reg),
+				  old_v, new_v);
+	}
+
+	if (next)
+		handle_tlb_pending_event(next, engine);
+}
+
+/**
+ * intel_gvt_switch_mmio - switch mmio context of specific engine
+ * @pre: the last vGPU that own the engine
+ * @next: the vGPU to switch to
+ * @engine: the engine
+ *
+ * If pre is null indicates that host own the engine. If next is null
+ * indicates that we are switching to host workload.
+ */
+void intel_gvt_switch_mmio(struct intel_vgpu *pre,
+			   struct intel_vgpu *next,
+			   const struct intel_engine_cs *engine)
+{
+	if (WARN(!pre && !next, "switch ring %s from host to HOST\n",
+		 engine->name))
+		return;
+
+	gvt_dbg_render("switch ring %s from %s to %s\n", engine->name,
+		       pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
+
+	/**
+	 * We are using raw mmio access wrapper to improve the
+	 * performace for batch mmio read/write, so we need
+	 * handle forcewake mannually.
+	 */
+	intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL);
+	switch_mmio(pre, next, engine);
+	intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL);
+}
+
+/**
+ * intel_gvt_init_engine_mmio_context - Initiate the engine mmio list
+ * @gvt: GVT device
+ *
+ */
+void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt)
+{
+	struct engine_mmio *mmio;
+
+	if (GRAPHICS_VER(gvt->gt->i915) >= 9) {
+		gvt->engine_mmio_list.mmio = gen9_engine_mmio_list;
+		gvt->engine_mmio_list.tlb_mmio_offset_list = gen8_tlb_mmio_offset_list;
+		gvt->engine_mmio_list.tlb_mmio_offset_list_cnt = ARRAY_SIZE(gen8_tlb_mmio_offset_list);
+		gvt->engine_mmio_list.mocs_mmio_offset_list = gen9_mocs_mmio_offset_list;
+		gvt->engine_mmio_list.mocs_mmio_offset_list_cnt = ARRAY_SIZE(gen9_mocs_mmio_offset_list);
+	} else {
+		gvt->engine_mmio_list.mmio = gen8_engine_mmio_list;
+		gvt->engine_mmio_list.tlb_mmio_offset_list = gen8_tlb_mmio_offset_list;
+		gvt->engine_mmio_list.tlb_mmio_offset_list_cnt = ARRAY_SIZE(gen8_tlb_mmio_offset_list);
+	}
+
+	for (mmio = gvt->engine_mmio_list.mmio;
+	     i915_mmio_reg_valid(mmio->reg); mmio++) {
+		if (mmio->in_context) {
+			gvt->engine_mmio_list.ctx_mmio_count[mmio->id]++;
+			intel_gvt_mmio_set_sr_in_ctx(gvt, mmio->reg.reg);
+		}
+	}
+}
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h
new file mode 100644
index 0000000000..9540813b88
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eddie Dong <eddie.dong@intel.com>
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *    Changbin Du <changbin.du@intel.com>
+ *    Zhenyu Wang <zhenyuw@linux.intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *
+ */
+
+#ifndef __GVT_RENDER_H__
+#define __GVT_RENDER_H__
+
+#include <linux/types.h>
+
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_engine_types.h"
+#include "gt/intel_lrc_reg.h"
+
+struct i915_request;
+struct intel_context;
+struct intel_engine_cs;
+struct intel_gvt;
+struct intel_vgpu;
+
+struct engine_mmio {
+	enum intel_engine_id id;
+	i915_reg_t reg;
+	u32 mask;
+	bool in_context;
+	u32 value;
+};
+
+void intel_gvt_switch_mmio(struct intel_vgpu *pre,
+			   struct intel_vgpu *next,
+			   const struct intel_engine_cs *engine);
+
+void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt);
+
+bool is_inhibit_context(struct intel_context *ce);
+
+int intel_vgpu_restore_inhibit_context(struct intel_vgpu *vgpu,
+				       struct i915_request *req);
+
+#define IS_RESTORE_INHIBIT(a) \
+	IS_MASKED_BITS_ENABLED(a, CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT)
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
new file mode 100644
index 0000000000..d2bed46654
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -0,0 +1,483 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/acpi.h>
+#include "i915_drv.h"
+#include "gvt.h"
+
+/*
+ * Note: Only for GVT-g virtual VBT generation, other usage must
+ * not do like this.
+ */
+#define _INTEL_BIOS_PRIVATE
+#include "display/intel_vbt_defs.h"
+
+#define OPREGION_SIGNATURE "IntelGraphicsMem"
+#define MBOX_VBT      (1<<3)
+
+/* device handle */
+#define DEVICE_TYPE_CRT    0x01
+#define DEVICE_TYPE_EFP1   0x04
+#define DEVICE_TYPE_EFP2   0x40
+#define DEVICE_TYPE_EFP3   0x20
+#define DEVICE_TYPE_EFP4   0x10
+
+struct opregion_header {
+	u8 signature[16];
+	u32 size;
+	u32 opregion_ver;
+	u8 bios_ver[32];
+	u8 vbios_ver[16];
+	u8 driver_ver[16];
+	u32 mboxes;
+	u32 driver_model;
+	u32 pcon;
+	u8 dver[32];
+	u8 rsvd[124];
+} __packed;
+
+struct bdb_data_header {
+	u8 id;
+	u16 size; /* data size */
+} __packed;
+
+/* For supporting windows guest with opregion, here hardcode the emulated
+ * bdb header version as '186', and the corresponding child_device_config
+ * length should be '33' but not '38'.
+ */
+struct efp_child_device_config {
+	u16 handle;
+	u16 device_type;
+	u16 device_class;
+	u8 i2c_speed;
+	u8 dp_onboard_redriver; /* 158 */
+	u8 dp_ondock_redriver; /* 158 */
+	u8 hdmi_level_shifter_value:4; /* 169 */
+	u8 hdmi_max_data_rate:4; /* 204 */
+	u16 dtd_buf_ptr; /* 161 */
+	u8 edidless_efp:1; /* 161 */
+	u8 compression_enable:1; /* 198 */
+	u8 compression_method:1; /* 198 */
+	u8 ganged_edp:1; /* 202 */
+	u8 skip0:4;
+	u8 compression_structure_index:4; /* 198 */
+	u8 skip1:4;
+	u8 slave_port; /*  202 */
+	u8 skip2;
+	u8 dvo_port;
+	u8 i2c_pin; /* for add-in card */
+	u8 slave_addr; /* for add-in card */
+	u8 ddc_pin;
+	u16 edid_ptr;
+	u8 dvo_config;
+	u8 efp_docked_port:1; /* 158 */
+	u8 lane_reversal:1; /* 184 */
+	u8 onboard_lspcon:1; /* 192 */
+	u8 iboost_enable:1; /* 196 */
+	u8 hpd_invert:1; /* BXT 196 */
+	u8 slip3:3;
+	u8 hdmi_compat:1;
+	u8 dp_compat:1;
+	u8 tmds_compat:1;
+	u8 skip4:5;
+	u8 aux_channel;
+	u8 dongle_detect;
+	u8 pipe_cap:2;
+	u8 sdvo_stall:1; /* 158 */
+	u8 hpd_status:2;
+	u8 integrated_encoder:1;
+	u8 skip5:2;
+	u8 dvo_wiring;
+	u8 mipi_bridge_type; /* 171 */
+	u16 device_class_ext;
+	u8 dvo_function;
+} __packed;
+
+struct vbt {
+	/* header->bdb_offset point to bdb_header offset */
+	struct vbt_header header;
+	struct bdb_header bdb_header;
+
+	struct bdb_data_header general_features_header;
+	struct bdb_general_features general_features;
+
+	struct bdb_data_header general_definitions_header;
+	struct bdb_general_definitions general_definitions;
+
+	struct efp_child_device_config child0;
+	struct efp_child_device_config child1;
+	struct efp_child_device_config child2;
+	struct efp_child_device_config child3;
+
+	struct bdb_data_header driver_features_header;
+	struct bdb_driver_features driver_features;
+};
+
+static void virt_vbt_generation(struct vbt *v)
+{
+	int num_child;
+
+	memset(v, 0, sizeof(struct vbt));
+
+	v->header.signature[0] = '$';
+	v->header.signature[1] = 'V';
+	v->header.signature[2] = 'B';
+	v->header.signature[3] = 'T';
+
+	/* there's features depending on version! */
+	v->header.version = 155;
+	v->header.header_size = sizeof(v->header);
+	v->header.vbt_size = sizeof(struct vbt);
+	v->header.bdb_offset = offsetof(struct vbt, bdb_header);
+
+	strcpy(&v->bdb_header.signature[0], "BIOS_DATA_BLOCK");
+	v->bdb_header.version = 186; /* child_dev_size = 33 */
+	v->bdb_header.header_size = sizeof(v->bdb_header);
+
+	v->bdb_header.bdb_size = sizeof(struct vbt) - sizeof(struct vbt_header);
+
+	/* general features */
+	v->general_features_header.id = BDB_GENERAL_FEATURES;
+	v->general_features_header.size = sizeof(struct bdb_general_features);
+	v->general_features.int_crt_support = 0;
+	v->general_features.int_tv_support = 0;
+
+	/* child device */
+	num_child = 4; /* each port has one child */
+	v->general_definitions.child_dev_size =
+		sizeof(struct efp_child_device_config);
+	v->general_definitions_header.id = BDB_GENERAL_DEFINITIONS;
+	/* size will include child devices */
+	v->general_definitions_header.size =
+		sizeof(struct bdb_general_definitions) +
+			num_child * v->general_definitions.child_dev_size;
+
+	/* portA */
+	v->child0.handle = DEVICE_TYPE_EFP1;
+	v->child0.device_type = DEVICE_TYPE_DP;
+	v->child0.dvo_port = DVO_PORT_DPA;
+	v->child0.aux_channel = DP_AUX_A;
+	v->child0.dp_compat = true;
+	v->child0.integrated_encoder = true;
+
+	/* portB */
+	v->child1.handle = DEVICE_TYPE_EFP2;
+	v->child1.device_type = DEVICE_TYPE_DP;
+	v->child1.dvo_port = DVO_PORT_DPB;
+	v->child1.aux_channel = DP_AUX_B;
+	v->child1.dp_compat = true;
+	v->child1.integrated_encoder = true;
+
+	/* portC */
+	v->child2.handle = DEVICE_TYPE_EFP3;
+	v->child2.device_type = DEVICE_TYPE_DP;
+	v->child2.dvo_port = DVO_PORT_DPC;
+	v->child2.aux_channel = DP_AUX_C;
+	v->child2.dp_compat = true;
+	v->child2.integrated_encoder = true;
+
+	/* portD */
+	v->child3.handle = DEVICE_TYPE_EFP4;
+	v->child3.device_type = DEVICE_TYPE_DP;
+	v->child3.dvo_port = DVO_PORT_DPD;
+	v->child3.aux_channel = DP_AUX_D;
+	v->child3.dp_compat = true;
+	v->child3.integrated_encoder = true;
+
+	/* driver features */
+	v->driver_features_header.id = BDB_DRIVER_FEATURES;
+	v->driver_features_header.size = sizeof(struct bdb_driver_features);
+	v->driver_features.lvds_config = BDB_DRIVER_FEATURE_NO_LVDS;
+}
+
+/**
+ * intel_vgpu_init_opregion - initialize the stuff used to emulate opregion
+ * @vgpu: a vGPU
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_init_opregion(struct intel_vgpu *vgpu)
+{
+	u8 *buf;
+	struct opregion_header *header;
+	struct vbt v;
+	const char opregion_signature[16] = OPREGION_SIGNATURE;
+
+	gvt_dbg_core("init vgpu%d opregion\n", vgpu->id);
+	vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL |
+			__GFP_ZERO,
+			get_order(INTEL_GVT_OPREGION_SIZE));
+	if (!vgpu_opregion(vgpu)->va) {
+		gvt_err("fail to get memory for vgpu virt opregion\n");
+		return -ENOMEM;
+	}
+
+	/* emulated opregion with VBT mailbox only */
+	buf = (u8 *)vgpu_opregion(vgpu)->va;
+	header = (struct opregion_header *)buf;
+	memcpy(header->signature, opregion_signature,
+	       sizeof(opregion_signature));
+	header->size = 0x8;
+	header->opregion_ver = 0x02000000;
+	header->mboxes = MBOX_VBT;
+
+	/* for unknown reason, the value in LID field is incorrect
+	 * which block the windows guest, so workaround it by force
+	 * setting it to "OPEN"
+	 */
+	buf[INTEL_GVT_OPREGION_CLID] = 0x3;
+
+	/* emulated vbt from virt vbt generation */
+	virt_vbt_generation(&v);
+	memcpy(buf + INTEL_GVT_OPREGION_VBT_OFFSET, &v, sizeof(struct vbt));
+
+	return 0;
+}
+
+/**
+ * intel_vgpu_opregion_base_write_handler - Opregion base register write handler
+ *
+ * @vgpu: a vGPU
+ * @gpa: guest physical address of opregion
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa)
+{
+
+	int i;
+
+	gvt_dbg_core("emulate opregion from kernel\n");
+
+	for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++)
+		vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i;
+	return 0;
+}
+
+/**
+ * intel_vgpu_clean_opregion - clean the stuff used to emulate opregion
+ * @vgpu: a vGPU
+ *
+ */
+void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu)
+{
+	gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id);
+
+	if (!vgpu_opregion(vgpu)->va)
+		return;
+
+	/* Guest opregion is released by VFIO */
+	free_pages((unsigned long)vgpu_opregion(vgpu)->va,
+		   get_order(INTEL_GVT_OPREGION_SIZE));
+
+	vgpu_opregion(vgpu)->va = NULL;
+
+}
+
+
+#define GVT_OPREGION_FUNC(scic)					\
+	({							\
+	 u32 __ret;						\
+	 __ret = (scic & OPREGION_SCIC_FUNC_MASK) >>		\
+	 OPREGION_SCIC_FUNC_SHIFT;				\
+	 __ret;							\
+	 })
+
+#define GVT_OPREGION_SUBFUNC(scic)				\
+	({							\
+	 u32 __ret;						\
+	 __ret = (scic & OPREGION_SCIC_SUBFUNC_MASK) >>		\
+	 OPREGION_SCIC_SUBFUNC_SHIFT;				\
+	 __ret;							\
+	 })
+
+static const char *opregion_func_name(u32 func)
+{
+	const char *name = NULL;
+
+	switch (func) {
+	case 0 ... 3:
+	case 5:
+	case 7 ... 15:
+		name = "Reserved";
+		break;
+
+	case 4:
+		name = "Get BIOS Data";
+		break;
+
+	case 6:
+		name = "System BIOS Callbacks";
+		break;
+
+	default:
+		name = "Unknown";
+		break;
+	}
+	return name;
+}
+
+static const char *opregion_subfunc_name(u32 subfunc)
+{
+	const char *name = NULL;
+
+	switch (subfunc) {
+	case 0:
+		name = "Supported Calls";
+		break;
+
+	case 1:
+		name = "Requested Callbacks";
+		break;
+
+	case 2 ... 3:
+	case 8 ... 9:
+		name = "Reserved";
+		break;
+
+	case 5:
+		name = "Boot Display";
+		break;
+
+	case 6:
+		name = "TV-Standard/Video-Connector";
+		break;
+
+	case 7:
+		name = "Internal Graphics";
+		break;
+
+	case 10:
+		name = "Spread Spectrum Clocks";
+		break;
+
+	case 11:
+		name = "Get AKSV";
+		break;
+
+	default:
+		name = "Unknown";
+		break;
+	}
+	return name;
+};
+
+static bool querying_capabilities(u32 scic)
+{
+	u32 func, subfunc;
+
+	func = GVT_OPREGION_FUNC(scic);
+	subfunc = GVT_OPREGION_SUBFUNC(scic);
+
+	if ((func == INTEL_GVT_OPREGION_SCIC_F_GETBIOSDATA &&
+		subfunc == INTEL_GVT_OPREGION_SCIC_SF_SUPPRTEDCALLS)
+		|| (func == INTEL_GVT_OPREGION_SCIC_F_GETBIOSDATA &&
+		 subfunc == INTEL_GVT_OPREGION_SCIC_SF_REQEUSTEDCALLBACKS)
+		|| (func == INTEL_GVT_OPREGION_SCIC_F_GETBIOSCALLBACKS &&
+		 subfunc == INTEL_GVT_OPREGION_SCIC_SF_SUPPRTEDCALLS)) {
+		return true;
+	}
+	return false;
+}
+
+/**
+ * intel_vgpu_emulate_opregion_request - emulating OpRegion request
+ * @vgpu: a vGPU
+ * @swsci: SWSCI request
+ *
+ * Returns:
+ * Zero on success, negative error code if failed
+ */
+int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
+{
+	u32 scic, parm;
+	u32 func, subfunc;
+	u64 scic_pa = 0, parm_pa = 0;
+	int ret;
+
+	scic_pa = (vgpu_opregion(vgpu)->gfn[0] << PAGE_SHIFT) +
+				INTEL_GVT_OPREGION_SCIC;
+	parm_pa = (vgpu_opregion(vgpu)->gfn[0] << PAGE_SHIFT) +
+				INTEL_GVT_OPREGION_PARM;
+	ret = intel_gvt_read_gpa(vgpu, scic_pa, &scic, sizeof(scic));
+	if (ret) {
+		gvt_vgpu_err("guest opregion read error %d, gpa 0x%llx, len %lu\n",
+			ret, scic_pa, sizeof(scic));
+		return ret;
+	}
+
+	ret = intel_gvt_read_gpa(vgpu, parm_pa, &parm, sizeof(parm));
+	if (ret) {
+		gvt_vgpu_err("guest opregion read error %d, gpa 0x%llx, len %lu\n",
+			ret, scic_pa, sizeof(scic));
+		return ret;
+	}
+
+	if (!(swsci & SWSCI_SCI_SELECT)) {
+		gvt_vgpu_err("requesting SMI service\n");
+		return 0;
+	}
+	/* ignore non 0->1 trasitions */
+	if ((vgpu_cfg_space(vgpu)[INTEL_GVT_PCI_SWSCI]
+				& SWSCI_SCI_TRIGGER) ||
+			!(swsci & SWSCI_SCI_TRIGGER)) {
+		return 0;
+	}
+
+	func = GVT_OPREGION_FUNC(scic);
+	subfunc = GVT_OPREGION_SUBFUNC(scic);
+	if (!querying_capabilities(scic)) {
+		gvt_vgpu_err("requesting runtime service: func \"%s\","
+				" subfunc \"%s\"\n",
+				opregion_func_name(func),
+				opregion_subfunc_name(subfunc));
+		/*
+		 * emulate exit status of function call, '0' means
+		 * "failure, generic, unsupported or unknown cause"
+		 */
+		scic &= ~OPREGION_SCIC_EXIT_MASK;
+		goto out;
+	}
+
+	scic = 0;
+	parm = 0;
+
+out:
+	ret = intel_gvt_write_gpa(vgpu, scic_pa, &scic, sizeof(scic));
+	if (ret) {
+		gvt_vgpu_err("guest opregion write error %d, gpa 0x%llx, len %lu\n",
+			ret, scic_pa, sizeof(scic));
+		return ret;
+	}
+
+	ret = intel_gvt_write_gpa(vgpu, parm_pa, &parm, sizeof(parm));
+	if (ret) {
+		gvt_vgpu_err("guest opregion write error %d, gpa 0x%llx, len %lu\n",
+			ret, scic_pa, sizeof(scic));
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c
new file mode 100644
index 0000000000..60a6543555
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/page_track.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright(c) 2011-2017 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "i915_drv.h"
+#include "gvt.h"
+
+/**
+ * intel_vgpu_find_page_track - find page track rcord of guest page
+ * @vgpu: a vGPU
+ * @gfn: the gfn of guest page
+ *
+ * Returns:
+ * A pointer to struct intel_vgpu_page_track if found, else NULL returned.
+ */
+struct intel_vgpu_page_track *intel_vgpu_find_page_track(
+		struct intel_vgpu *vgpu, unsigned long gfn)
+{
+	return radix_tree_lookup(&vgpu->page_track_tree, gfn);
+}
+
+/**
+ * intel_vgpu_register_page_track - register a guest page to be tacked
+ * @vgpu: a vGPU
+ * @gfn: the gfn of guest page
+ * @handler: page track handler
+ * @priv: tracker private
+ *
+ * Returns:
+ * zero on success, negative error code if failed.
+ */
+int intel_vgpu_register_page_track(struct intel_vgpu *vgpu, unsigned long gfn,
+		gvt_page_track_handler_t handler, void *priv)
+{
+	struct intel_vgpu_page_track *track;
+	int ret;
+
+	track = intel_vgpu_find_page_track(vgpu, gfn);
+	if (track)
+		return -EEXIST;
+
+	track = kzalloc(sizeof(*track), GFP_KERNEL);
+	if (!track)
+		return -ENOMEM;
+
+	track->handler = handler;
+	track->priv_data = priv;
+
+	ret = radix_tree_insert(&vgpu->page_track_tree, gfn, track);
+	if (ret) {
+		kfree(track);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * intel_vgpu_unregister_page_track - unregister the tracked guest page
+ * @vgpu: a vGPU
+ * @gfn: the gfn of guest page
+ *
+ */
+void intel_vgpu_unregister_page_track(struct intel_vgpu *vgpu,
+		unsigned long gfn)
+{
+	struct intel_vgpu_page_track *track;
+
+	track = radix_tree_delete(&vgpu->page_track_tree, gfn);
+	if (track) {
+		if (track->tracked)
+			intel_gvt_page_track_remove(vgpu, gfn);
+		kfree(track);
+	}
+}
+
+/**
+ * intel_vgpu_enable_page_track - set write-protection on guest page
+ * @vgpu: a vGPU
+ * @gfn: the gfn of guest page
+ *
+ * Returns:
+ * zero on success, negative error code if failed.
+ */
+int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn)
+{
+	struct intel_vgpu_page_track *track;
+	int ret;
+
+	track = intel_vgpu_find_page_track(vgpu, gfn);
+	if (!track)
+		return -ENXIO;
+
+	if (track->tracked)
+		return 0;
+
+	ret = intel_gvt_page_track_add(vgpu, gfn);
+	if (ret)
+		return ret;
+	track->tracked = true;
+	return 0;
+}
+
+/**
+ * intel_vgpu_disable_page_track - cancel write-protection on guest page
+ * @vgpu: a vGPU
+ * @gfn: the gfn of guest page
+ *
+ * Returns:
+ * zero on success, negative error code if failed.
+ */
+int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn)
+{
+	struct intel_vgpu_page_track *track;
+	int ret;
+
+	track = intel_vgpu_find_page_track(vgpu, gfn);
+	if (!track)
+		return -ENXIO;
+
+	if (!track->tracked)
+		return 0;
+
+	ret = intel_gvt_page_track_remove(vgpu, gfn);
+	if (ret)
+		return ret;
+	track->tracked = false;
+	return 0;
+}
+
+/**
+ * intel_vgpu_page_track_handler - called when write to write-protected page
+ * @vgpu: a vGPU
+ * @gpa: the gpa of this write
+ * @data: the writed data
+ * @bytes: the length of this write
+ *
+ * Returns:
+ * zero on success, negative error code if failed.
+ */
+int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
+		void *data, unsigned int bytes)
+{
+	struct intel_vgpu_page_track *page_track;
+	int ret = 0;
+
+	page_track = intel_vgpu_find_page_track(vgpu, gpa >> PAGE_SHIFT);
+	if (!page_track)
+		return -ENXIO;
+
+	if (unlikely(vgpu->failsafe)) {
+		/* Remove write protection to prevent furture traps. */
+		intel_gvt_page_track_remove(vgpu, gpa >> PAGE_SHIFT);
+	} else {
+		ret = page_track->handler(page_track, gpa, data, bytes);
+		if (ret)
+			gvt_err("guest page write error, gpa %llx\n", gpa);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gvt/page_track.h b/drivers/gpu/drm/i915/gvt/page_track.h
new file mode 100644
index 0000000000..f6eb713558
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/page_track.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright(c) 2011-2017 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef _GVT_PAGE_TRACK_H_
+#define _GVT_PAGE_TRACK_H_
+
+#include <linux/types.h>
+
+struct intel_vgpu;
+struct intel_vgpu_page_track;
+
+typedef int (*gvt_page_track_handler_t)(
+			struct intel_vgpu_page_track *page_track,
+			u64 gpa, void *data, int bytes);
+
+/* Track record for a write-protected guest page. */
+struct intel_vgpu_page_track {
+	gvt_page_track_handler_t handler;
+	bool tracked;
+	void *priv_data;
+};
+
+struct intel_vgpu_page_track *intel_vgpu_find_page_track(
+		struct intel_vgpu *vgpu, unsigned long gfn);
+
+int intel_vgpu_register_page_track(struct intel_vgpu *vgpu,
+		unsigned long gfn, gvt_page_track_handler_t handler,
+		void *priv);
+void intel_vgpu_unregister_page_track(struct intel_vgpu *vgpu,
+		unsigned long gfn);
+
+int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn);
+int intel_vgpu_disable_page_track(struct intel_vgpu *vgpu, unsigned long gfn);
+
+int intel_vgpu_page_track_handler(struct intel_vgpu *vgpu, u64 gpa,
+		void *data, unsigned int bytes);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/reg.h b/drivers/gpu/drm/i915/gvt/reg.h
new file mode 100644
index 0000000000..d8216c63c3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/reg.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _GVT_REG_H
+#define _GVT_REG_H
+
+#define INTEL_GVT_PCI_CLASS_VGA_OTHER   0x80
+
+#define INTEL_GVT_PCI_GMCH_CONTROL	0x50
+#define   BDW_GMCH_GMS_SHIFT		8
+#define   BDW_GMCH_GMS_MASK		0xff
+
+#define INTEL_GVT_PCI_SWSCI		0xe8
+#define   SWSCI_SCI_SELECT		(1 << 15)
+#define   SWSCI_SCI_TRIGGER		1
+
+#define INTEL_GVT_PCI_OPREGION		0xfc
+
+#define INTEL_GVT_OPREGION_CLID		0x1AC
+#define INTEL_GVT_OPREGION_SCIC		0x200
+#define   OPREGION_SCIC_FUNC_MASK	0x1E
+#define   OPREGION_SCIC_FUNC_SHIFT	1
+#define   OPREGION_SCIC_SUBFUNC_MASK	0xFF00
+#define   OPREGION_SCIC_SUBFUNC_SHIFT	8
+#define   OPREGION_SCIC_EXIT_MASK	0xE0
+#define INTEL_GVT_OPREGION_SCIC_F_GETBIOSDATA         4
+#define INTEL_GVT_OPREGION_SCIC_F_GETBIOSCALLBACKS    6
+#define INTEL_GVT_OPREGION_SCIC_SF_SUPPRTEDCALLS      0
+#define INTEL_GVT_OPREGION_SCIC_SF_REQEUSTEDCALLBACKS 1
+#define INTEL_GVT_OPREGION_PARM                   0x204
+
+#define INTEL_GVT_OPREGION_PAGES	2
+#define INTEL_GVT_OPREGION_SIZE		(INTEL_GVT_OPREGION_PAGES * PAGE_SIZE)
+#define INTEL_GVT_OPREGION_VBT_OFFSET	0x400
+#define INTEL_GVT_OPREGION_VBT_SIZE	\
+		(INTEL_GVT_OPREGION_SIZE - INTEL_GVT_OPREGION_VBT_OFFSET)
+
+#define VGT_SPRSTRIDE(pipe)	_PIPE(pipe, _SPRA_STRIDE, _PLANE_STRIDE_2_B)
+
+#define _REG_701C0(pipe, plane) (0x701c0 + pipe * 0x1000 + (plane - 1) * 0x100)
+#define _REG_701C4(pipe, plane) (0x701c4 + pipe * 0x1000 + (plane - 1) * 0x100)
+
+#define SKL_FLIP_EVENT(pipe, plane) (PRIMARY_A_FLIP_DONE + (plane) * 3 + (pipe))
+
+#define REG50080_FLIP_TYPE_MASK	0x3
+#define REG50080_FLIP_TYPE_ASYNC	0x1
+
+#define REG_50080(_pipe, _plane) ({ \
+	typeof(_pipe) (p) = (_pipe); \
+	typeof(_plane) (q) = (_plane); \
+	(((p) == PIPE_A) ? (((q) == PLANE_PRIMARY) ? (_MMIO(0x50080)) : \
+		(_MMIO(0x50090))) : \
+	(((p) == PIPE_B) ? (((q) == PLANE_PRIMARY) ? (_MMIO(0x50088)) : \
+		(_MMIO(0x50098))) : \
+	(((p) == PIPE_C) ? (((q) == PLANE_PRIMARY) ? (_MMIO(0x5008C)) : \
+		(_MMIO(0x5009C))) : \
+		(_MMIO(0x50080))))); })
+
+#define REG_50080_TO_PIPE(_reg) ({ \
+	typeof(_reg) (reg) = (_reg); \
+	(((reg) == 0x50080 || (reg) == 0x50090) ? (PIPE_A) : \
+	(((reg) == 0x50088 || (reg) == 0x50098) ? (PIPE_B) : \
+	(((reg) == 0x5008C || (reg) == 0x5009C) ? (PIPE_C) : \
+	(INVALID_PIPE)))); })
+
+#define REG_50080_TO_PLANE(_reg) ({ \
+	typeof(_reg) (reg) = (_reg); \
+	(((reg) == 0x50080 || (reg) == 0x50088 || (reg) == 0x5008C) ? \
+		(PLANE_PRIMARY) : \
+	(((reg) == 0x50090 || (reg) == 0x50098 || (reg) == 0x5009C) ? \
+		(PLANE_SPRITE0) : (I915_MAX_PLANES))); })
+
+#define GFX_MODE_BIT_SET_IN_MASK(val, bit) \
+		((((bit) & 0xffff0000) == 0) && !!((val) & (((bit) << 16))))
+
+#define IS_MASKED_BITS_ENABLED(_val, _b) \
+		(((_val) & _MASKED_BIT_ENABLE(_b)) == _MASKED_BIT_ENABLE(_b))
+#define IS_MASKED_BITS_DISABLED(_val, _b) \
+		((_val) & _MASKED_BIT_DISABLE(_b))
+
+#define FORCEWAKE_RENDER_GEN9_REG 0xa278
+#define FORCEWAKE_ACK_RENDER_GEN9_REG 0x0D84
+#define FORCEWAKE_GT_GEN9_REG 0xa188
+#define FORCEWAKE_ACK_GT_GEN9_REG 0x130044
+#define FORCEWAKE_MEDIA_GEN9_REG 0xa270
+#define FORCEWAKE_ACK_MEDIA_GEN9_REG 0x0D88
+#define FORCEWAKE_ACK_HSW_REG 0x130044
+
+#define RB_HEAD_WRAP_CNT_MAX	((1 << 11) - 1)
+#define RB_HEAD_WRAP_CNT_OFF	21
+#define RB_HEAD_OFF_MASK	((1U << 21) - (1U << 2))
+#define RB_TAIL_OFF_MASK	((1U << 21) - (1U << 3))
+#define RB_TAIL_SIZE_MASK	((1U << 21) - (1U << 12))
+#define _RING_CTL_BUF_SIZE(ctl) (((ctl) & RB_TAIL_SIZE_MASK) + \
+		I915_GTT_PAGE_SIZE)
+
+#define PCH_GPIO_BASE	_MMIO(0xc5010)
+
+#define PCH_GMBUS0	_MMIO(0xc5100)
+#define PCH_GMBUS1	_MMIO(0xc5104)
+#define PCH_GMBUS2	_MMIO(0xc5108)
+#define PCH_GMBUS3	_MMIO(0xc510c)
+#define PCH_GMBUS4	_MMIO(0xc5110)
+#define PCH_GMBUS5	_MMIO(0xc5120)
+
+#define TRVATTL3PTRDW(i)	_MMIO(0x4de0 + (i) * 4)
+#define TRNULLDETCT		_MMIO(0x4de8)
+#define TRINVTILEDETCT		_MMIO(0x4dec)
+#define TRVADR			_MMIO(0x4df0)
+#define TRTTE			_MMIO(0x4df4)
+#define RING_EXCC(base)		_MMIO((base) + 0x28)
+#define RING_GFX_MODE(base)	_MMIO((base) + 0x29c)
+#define VF_GUARDBAND		_MMIO(0x83a4)
+
+#define BCS_TILE_REGISTER_VAL_OFFSET (0x43*4)
+
+/* XXX FIXME i915 has changed PP_XXX definition */
+#define PCH_PP_STATUS  _MMIO(0xc7200)
+#define PCH_PP_CONTROL _MMIO(0xc7204)
+#define PCH_PP_ON_DELAYS _MMIO(0xc7208)
+#define PCH_PP_OFF_DELAYS _MMIO(0xc720c)
+#define PCH_PP_DIVISOR _MMIO(0xc7210)
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
new file mode 100644
index 0000000000..c077fb4674
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -0,0 +1,480 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Anhua Xu
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "gvt.h"
+
+static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
+{
+	enum intel_engine_id i;
+	struct intel_engine_cs *engine;
+
+	for_each_engine(engine, vgpu->gvt->gt, i) {
+		if (!list_empty(workload_q_head(vgpu, engine)))
+			return true;
+	}
+
+	return false;
+}
+
+/* We give 2 seconds higher prio for vGPU during start */
+#define GVT_SCHED_VGPU_PRI_TIME  2
+
+struct vgpu_sched_data {
+	struct list_head lru_list;
+	struct intel_vgpu *vgpu;
+	bool active;
+	bool pri_sched;
+	ktime_t pri_time;
+	ktime_t sched_in_time;
+	ktime_t sched_time;
+	ktime_t left_ts;
+	ktime_t allocated_ts;
+
+	struct vgpu_sched_ctl sched_ctl;
+};
+
+struct gvt_sched_data {
+	struct intel_gvt *gvt;
+	struct hrtimer timer;
+	unsigned long period;
+	struct list_head lru_runq_head;
+	ktime_t expire_time;
+};
+
+static void vgpu_update_timeslice(struct intel_vgpu *vgpu, ktime_t cur_time)
+{
+	ktime_t delta_ts;
+	struct vgpu_sched_data *vgpu_data;
+
+	if (!vgpu || vgpu == vgpu->gvt->idle_vgpu)
+		return;
+
+	vgpu_data = vgpu->sched_data;
+	delta_ts = ktime_sub(cur_time, vgpu_data->sched_in_time);
+	vgpu_data->sched_time = ktime_add(vgpu_data->sched_time, delta_ts);
+	vgpu_data->left_ts = ktime_sub(vgpu_data->left_ts, delta_ts);
+	vgpu_data->sched_in_time = cur_time;
+}
+
+#define GVT_TS_BALANCE_PERIOD_MS 100
+#define GVT_TS_BALANCE_STAGE_NUM 10
+
+static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
+{
+	struct vgpu_sched_data *vgpu_data;
+	struct list_head *pos;
+	static u64 stage_check;
+	int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM;
+
+	/* The timeslice accumulation reset at stage 0, which is
+	 * allocated again without adding previous debt.
+	 */
+	if (stage == 0) {
+		int total_weight = 0;
+		ktime_t fair_timeslice;
+
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+			total_weight += vgpu_data->sched_ctl.weight;
+		}
+
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+			fair_timeslice = ktime_divns(ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS),
+						     total_weight) * vgpu_data->sched_ctl.weight;
+
+			vgpu_data->allocated_ts = fair_timeslice;
+			vgpu_data->left_ts = vgpu_data->allocated_ts;
+		}
+	} else {
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+
+			/* timeslice for next 100ms should add the left/debt
+			 * slice of previous stages.
+			 */
+			vgpu_data->left_ts += vgpu_data->allocated_ts;
+		}
+	}
+}
+
+static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
+{
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	enum intel_engine_id i;
+	struct intel_engine_cs *engine;
+	struct vgpu_sched_data *vgpu_data;
+	ktime_t cur_time;
+
+	/* no need to schedule if next_vgpu is the same with current_vgpu,
+	 * let scheduler chose next_vgpu again by setting it to NULL.
+	 */
+	if (scheduler->next_vgpu == scheduler->current_vgpu) {
+		scheduler->next_vgpu = NULL;
+		return;
+	}
+
+	/*
+	 * after the flag is set, workload dispatch thread will
+	 * stop dispatching workload for current vgpu
+	 */
+	scheduler->need_reschedule = true;
+
+	/* still have uncompleted workload? */
+	for_each_engine(engine, gvt->gt, i) {
+		if (scheduler->current_workload[engine->id])
+			return;
+	}
+
+	cur_time = ktime_get();
+	vgpu_update_timeslice(scheduler->current_vgpu, cur_time);
+	vgpu_data = scheduler->next_vgpu->sched_data;
+	vgpu_data->sched_in_time = cur_time;
+
+	/* switch current vgpu */
+	scheduler->current_vgpu = scheduler->next_vgpu;
+	scheduler->next_vgpu = NULL;
+
+	scheduler->need_reschedule = false;
+
+	/* wake up workload dispatch thread */
+	for_each_engine(engine, gvt->gt, i)
+		wake_up(&scheduler->waitq[engine->id]);
+}
+
+static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
+{
+	struct vgpu_sched_data *vgpu_data;
+	struct intel_vgpu *vgpu = NULL;
+	struct list_head *head = &sched_data->lru_runq_head;
+	struct list_head *pos;
+
+	/* search a vgpu with pending workload */
+	list_for_each(pos, head) {
+
+		vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+		if (!vgpu_has_pending_workload(vgpu_data->vgpu))
+			continue;
+
+		if (vgpu_data->pri_sched) {
+			if (ktime_before(ktime_get(), vgpu_data->pri_time)) {
+				vgpu = vgpu_data->vgpu;
+				break;
+			} else
+				vgpu_data->pri_sched = false;
+		}
+
+		/* Return the vGPU only if it has time slice left */
+		if (vgpu_data->left_ts > 0) {
+			vgpu = vgpu_data->vgpu;
+			break;
+		}
+	}
+
+	return vgpu;
+}
+
+/* in nanosecond */
+#define GVT_DEFAULT_TIME_SLICE 1000000
+
+static void tbs_sched_func(struct gvt_sched_data *sched_data)
+{
+	struct intel_gvt *gvt = sched_data->gvt;
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	struct vgpu_sched_data *vgpu_data;
+	struct intel_vgpu *vgpu = NULL;
+
+	/* no active vgpu or has already had a target */
+	if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
+		goto out;
+
+	vgpu = find_busy_vgpu(sched_data);
+	if (vgpu) {
+		scheduler->next_vgpu = vgpu;
+		vgpu_data = vgpu->sched_data;
+		if (!vgpu_data->pri_sched) {
+			/* Move the last used vGPU to the tail of lru_list */
+			list_del_init(&vgpu_data->lru_list);
+			list_add_tail(&vgpu_data->lru_list,
+				      &sched_data->lru_runq_head);
+		}
+	} else {
+		scheduler->next_vgpu = gvt->idle_vgpu;
+	}
+out:
+	if (scheduler->next_vgpu)
+		try_to_schedule_next_vgpu(gvt);
+}
+
+void intel_gvt_schedule(struct intel_gvt *gvt)
+{
+	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
+	ktime_t cur_time;
+
+	mutex_lock(&gvt->sched_lock);
+	cur_time = ktime_get();
+
+	if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
+				(void *)&gvt->service_request)) {
+		if (cur_time >= sched_data->expire_time) {
+			gvt_balance_timeslice(sched_data);
+			sched_data->expire_time = ktime_add_ms(
+				cur_time, GVT_TS_BALANCE_PERIOD_MS);
+		}
+	}
+	clear_bit(INTEL_GVT_REQUEST_EVENT_SCHED, (void *)&gvt->service_request);
+
+	vgpu_update_timeslice(gvt->scheduler.current_vgpu, cur_time);
+	tbs_sched_func(sched_data);
+
+	mutex_unlock(&gvt->sched_lock);
+}
+
+static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data)
+{
+	struct gvt_sched_data *data;
+
+	data = container_of(timer_data, struct gvt_sched_data, timer);
+
+	intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED);
+
+	hrtimer_add_expires_ns(&data->timer, data->period);
+
+	return HRTIMER_RESTART;
+}
+
+static int tbs_sched_init(struct intel_gvt *gvt)
+{
+	struct intel_gvt_workload_scheduler *scheduler =
+		&gvt->scheduler;
+
+	struct gvt_sched_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&data->lru_runq_head);
+	hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	data->timer.function = tbs_timer_fn;
+	data->period = GVT_DEFAULT_TIME_SLICE;
+	data->gvt = gvt;
+
+	scheduler->sched_data = data;
+
+	return 0;
+}
+
+static void tbs_sched_clean(struct intel_gvt *gvt)
+{
+	struct intel_gvt_workload_scheduler *scheduler =
+		&gvt->scheduler;
+	struct gvt_sched_data *data = scheduler->sched_data;
+
+	hrtimer_cancel(&data->timer);
+
+	kfree(data);
+	scheduler->sched_data = NULL;
+}
+
+static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
+{
+	struct vgpu_sched_data *data;
+
+	data = kzalloc(sizeof(*data), GFP_KERNEL);
+	if (!data)
+		return -ENOMEM;
+
+	data->sched_ctl.weight = vgpu->sched_ctl.weight;
+	data->vgpu = vgpu;
+	INIT_LIST_HEAD(&data->lru_list);
+
+	vgpu->sched_data = data;
+
+	return 0;
+}
+
+static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
+
+	kfree(vgpu->sched_data);
+	vgpu->sched_data = NULL;
+
+	/* this vgpu id has been removed */
+	if (idr_is_empty(&gvt->vgpu_idr))
+		hrtimer_cancel(&sched_data->timer);
+}
+
+static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
+{
+	struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
+	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
+	ktime_t now;
+
+	if (!list_empty(&vgpu_data->lru_list))
+		return;
+
+	now = ktime_get();
+	vgpu_data->pri_time = ktime_add(now,
+					ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0));
+	vgpu_data->pri_sched = true;
+
+	list_add(&vgpu_data->lru_list, &sched_data->lru_runq_head);
+
+	if (!hrtimer_active(&sched_data->timer))
+		hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
+			sched_data->period), HRTIMER_MODE_ABS);
+	vgpu_data->active = true;
+}
+
+static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
+{
+	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
+
+	list_del_init(&vgpu_data->lru_list);
+	vgpu_data->active = false;
+}
+
+static const struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
+	.init = tbs_sched_init,
+	.clean = tbs_sched_clean,
+	.init_vgpu = tbs_sched_init_vgpu,
+	.clean_vgpu = tbs_sched_clean_vgpu,
+	.start_schedule = tbs_sched_start_schedule,
+	.stop_schedule = tbs_sched_stop_schedule,
+};
+
+int intel_gvt_init_sched_policy(struct intel_gvt *gvt)
+{
+	int ret;
+
+	mutex_lock(&gvt->sched_lock);
+	gvt->scheduler.sched_ops = &tbs_schedule_ops;
+	ret = gvt->scheduler.sched_ops->init(gvt);
+	mutex_unlock(&gvt->sched_lock);
+
+	return ret;
+}
+
+void intel_gvt_clean_sched_policy(struct intel_gvt *gvt)
+{
+	mutex_lock(&gvt->sched_lock);
+	gvt->scheduler.sched_ops->clean(gvt);
+	mutex_unlock(&gvt->sched_lock);
+}
+
+/* for per-vgpu scheduler policy, there are 2 per-vgpu data:
+ * sched_data, and sched_ctl. We see these 2 data as part of
+ * the global scheduler which are proteced by gvt->sched_lock.
+ * Caller should make their decision if the vgpu_lock should
+ * be hold outside.
+ */
+
+int intel_vgpu_init_sched_policy(struct intel_vgpu *vgpu)
+{
+	int ret;
+
+	mutex_lock(&vgpu->gvt->sched_lock);
+	ret = vgpu->gvt->scheduler.sched_ops->init_vgpu(vgpu);
+	mutex_unlock(&vgpu->gvt->sched_lock);
+
+	return ret;
+}
+
+void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu)
+{
+	mutex_lock(&vgpu->gvt->sched_lock);
+	vgpu->gvt->scheduler.sched_ops->clean_vgpu(vgpu);
+	mutex_unlock(&vgpu->gvt->sched_lock);
+}
+
+void intel_vgpu_start_schedule(struct intel_vgpu *vgpu)
+{
+	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
+
+	mutex_lock(&vgpu->gvt->sched_lock);
+	if (!vgpu_data->active) {
+		gvt_dbg_core("vgpu%d: start schedule\n", vgpu->id);
+		vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu);
+	}
+	mutex_unlock(&vgpu->gvt->sched_lock);
+}
+
+void intel_gvt_kick_schedule(struct intel_gvt *gvt)
+{
+	mutex_lock(&gvt->sched_lock);
+	intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
+	mutex_unlock(&gvt->sched_lock);
+}
+
+void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt_workload_scheduler *scheduler =
+		&vgpu->gvt->scheduler;
+	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
+	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	if (!vgpu_data->active)
+		return;
+
+	gvt_dbg_core("vgpu%d: stop schedule\n", vgpu->id);
+
+	mutex_lock(&vgpu->gvt->sched_lock);
+	scheduler->sched_ops->stop_schedule(vgpu);
+
+	if (scheduler->next_vgpu == vgpu)
+		scheduler->next_vgpu = NULL;
+
+	if (scheduler->current_vgpu == vgpu) {
+		/* stop workload dispatching */
+		scheduler->need_reschedule = true;
+		scheduler->current_vgpu = NULL;
+	}
+
+	intel_runtime_pm_get(&dev_priv->runtime_pm);
+	spin_lock_bh(&scheduler->mmio_context_lock);
+	for_each_engine(engine, vgpu->gvt->gt, id) {
+		if (scheduler->engine_owner[engine->id] == vgpu) {
+			intel_gvt_switch_mmio(vgpu, NULL, engine);
+			scheduler->engine_owner[engine->id] = NULL;
+		}
+	}
+	spin_unlock_bh(&scheduler->mmio_context_lock);
+	intel_runtime_pm_put_unchecked(&dev_priv->runtime_pm);
+	mutex_unlock(&vgpu->gvt->sched_lock);
+}
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h
new file mode 100644
index 0000000000..3dacdad5f5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Anhua Xu
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#ifndef __GVT_SCHED_POLICY__
+#define __GVT_SCHED_POLICY__
+
+struct intel_gvt;
+struct intel_vgpu;
+
+struct intel_gvt_sched_policy_ops {
+	int (*init)(struct intel_gvt *gvt);
+	void (*clean)(struct intel_gvt *gvt);
+	int (*init_vgpu)(struct intel_vgpu *vgpu);
+	void (*clean_vgpu)(struct intel_vgpu *vgpu);
+	void (*start_schedule)(struct intel_vgpu *vgpu);
+	void (*stop_schedule)(struct intel_vgpu *vgpu);
+};
+
+void intel_gvt_schedule(struct intel_gvt *gvt);
+
+int intel_gvt_init_sched_policy(struct intel_gvt *gvt);
+
+void intel_gvt_clean_sched_policy(struct intel_gvt *gvt);
+
+int intel_vgpu_init_sched_policy(struct intel_vgpu *vgpu);
+
+void intel_vgpu_clean_sched_policy(struct intel_vgpu *vgpu);
+
+void intel_vgpu_start_schedule(struct intel_vgpu *vgpu);
+
+void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu);
+
+void intel_gvt_kick_schedule(struct intel_gvt *gvt);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
new file mode 100644
index 0000000000..a5c8005ec4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -0,0 +1,1786 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ * Contributors:
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Chanbin Du <changbin.du@intel.com>
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Zhenyu Wang <zhenyuw@linux.intel.com>
+ *
+ */
+
+#include <linux/kthread.h>
+
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+#include "gt/intel_execlists_submission.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_lrc.h"
+#include "gt/intel_ring.h"
+
+#include "i915_drv.h"
+#include "i915_gem_gtt.h"
+#include "i915_perf_oa_regs.h"
+#include "gvt.h"
+
+#define RING_CTX_OFF(x) \
+	offsetof(struct execlist_ring_context, x)
+
+static void set_context_pdp_root_pointer(
+		struct execlist_ring_context *ring_context,
+		u32 pdp[8])
+{
+	int i;
+
+	for (i = 0; i < 8; i++)
+		ring_context->pdps[i].val = pdp[7 - i];
+}
+
+static void update_shadow_pdps(struct intel_vgpu_workload *workload)
+{
+	struct execlist_ring_context *shadow_ring_context;
+	struct intel_context *ctx = workload->req->context;
+
+	if (WARN_ON(!workload->shadow_mm))
+		return;
+
+	if (WARN_ON(!atomic_read(&workload->shadow_mm->pincount)))
+		return;
+
+	shadow_ring_context = (struct execlist_ring_context *)ctx->lrc_reg_state;
+	set_context_pdp_root_pointer(shadow_ring_context,
+			(void *)workload->shadow_mm->ppgtt_mm.shadow_pdps);
+}
+
+/*
+ * when populating shadow ctx from guest, we should not overrride oa related
+ * registers, so that they will not be overlapped by guest oa configs. Thus
+ * made it possible to capture oa data from host for both host and guests.
+ */
+static void sr_oa_regs(struct intel_vgpu_workload *workload,
+		u32 *reg_state, bool save)
+{
+	struct drm_i915_private *dev_priv = workload->vgpu->gvt->gt->i915;
+	u32 ctx_oactxctrl = dev_priv->perf.ctx_oactxctrl_offset;
+	u32 ctx_flexeu0 = dev_priv->perf.ctx_flexeu0_offset;
+	int i = 0;
+	u32 flex_mmio[] = {
+		i915_mmio_reg_offset(EU_PERF_CNTL0),
+		i915_mmio_reg_offset(EU_PERF_CNTL1),
+		i915_mmio_reg_offset(EU_PERF_CNTL2),
+		i915_mmio_reg_offset(EU_PERF_CNTL3),
+		i915_mmio_reg_offset(EU_PERF_CNTL4),
+		i915_mmio_reg_offset(EU_PERF_CNTL5),
+		i915_mmio_reg_offset(EU_PERF_CNTL6),
+	};
+
+	if (workload->engine->id != RCS0)
+		return;
+
+	if (save) {
+		workload->oactxctrl = reg_state[ctx_oactxctrl + 1];
+
+		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+			u32 state_offset = ctx_flexeu0 + i * 2;
+
+			workload->flex_mmio[i] = reg_state[state_offset + 1];
+		}
+	} else {
+		reg_state[ctx_oactxctrl] =
+			i915_mmio_reg_offset(GEN8_OACTXCONTROL);
+		reg_state[ctx_oactxctrl + 1] = workload->oactxctrl;
+
+		for (i = 0; i < ARRAY_SIZE(workload->flex_mmio); i++) {
+			u32 state_offset = ctx_flexeu0 + i * 2;
+			u32 mmio = flex_mmio[i];
+
+			reg_state[state_offset] = mmio;
+			reg_state[state_offset + 1] = workload->flex_mmio[i];
+		}
+	}
+}
+
+static int populate_shadow_context(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_context *ctx = workload->req->context;
+	struct execlist_ring_context *shadow_ring_context;
+	void *dst;
+	void *context_base;
+	unsigned long context_gpa, context_page_num;
+	unsigned long gpa_base; /* first gpa of consecutive GPAs */
+	unsigned long gpa_size; /* size of consecutive GPAs */
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	int i;
+	bool skip = false;
+	int ring_id = workload->engine->id;
+	int ret;
+
+	GEM_BUG_ON(!intel_context_is_pinned(ctx));
+
+	context_base = (void *) ctx->lrc_reg_state -
+				(LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
+
+	shadow_ring_context = (void *) ctx->lrc_reg_state;
+
+	sr_oa_regs(workload, (u32 *)shadow_ring_context, true);
+#define COPY_REG(name) \
+	intel_gvt_read_gpa(vgpu, workload->ring_context_gpa \
+		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
+#define COPY_REG_MASKED(name) {\
+		intel_gvt_read_gpa(vgpu, workload->ring_context_gpa \
+					      + RING_CTX_OFF(name.val),\
+					      &shadow_ring_context->name.val, 4);\
+		shadow_ring_context->name.val |= 0xffff << 16;\
+	}
+
+	COPY_REG_MASKED(ctx_ctrl);
+	COPY_REG(ctx_timestamp);
+
+	if (workload->engine->id == RCS0) {
+		COPY_REG(bb_per_ctx_ptr);
+		COPY_REG(rcs_indirect_ctx);
+		COPY_REG(rcs_indirect_ctx_offset);
+	} else if (workload->engine->id == BCS0)
+		intel_gvt_read_gpa(vgpu,
+				workload->ring_context_gpa +
+				BCS_TILE_REGISTER_VAL_OFFSET,
+				(void *)shadow_ring_context +
+				BCS_TILE_REGISTER_VAL_OFFSET, 4);
+#undef COPY_REG
+#undef COPY_REG_MASKED
+
+	/* don't copy Ring Context (the first 0x50 dwords),
+	 * only copy the Engine Context part from guest
+	 */
+	intel_gvt_read_gpa(vgpu,
+			workload->ring_context_gpa +
+			RING_CTX_SIZE,
+			(void *)shadow_ring_context +
+			RING_CTX_SIZE,
+			I915_GTT_PAGE_SIZE - RING_CTX_SIZE);
+
+	sr_oa_regs(workload, (u32 *)shadow_ring_context, false);
+
+	gvt_dbg_sched("ring %s workload lrca %x, ctx_id %x, ctx gpa %llx",
+			workload->engine->name, workload->ctx_desc.lrca,
+			workload->ctx_desc.context_id,
+			workload->ring_context_gpa);
+
+	/* only need to ensure this context is not pinned/unpinned during the
+	 * period from last submission to this this submission.
+	 * Upon reaching this function, the currently submitted context is not
+	 * supposed to get unpinned. If a misbehaving guest driver ever does
+	 * this, it would corrupt itself.
+	 */
+	if (s->last_ctx[ring_id].valid &&
+			(s->last_ctx[ring_id].lrca ==
+				workload->ctx_desc.lrca) &&
+			(s->last_ctx[ring_id].ring_context_gpa ==
+				workload->ring_context_gpa))
+		skip = true;
+
+	s->last_ctx[ring_id].lrca = workload->ctx_desc.lrca;
+	s->last_ctx[ring_id].ring_context_gpa = workload->ring_context_gpa;
+
+	if (IS_RESTORE_INHIBIT(shadow_ring_context->ctx_ctrl.val) || skip)
+		return 0;
+
+	s->last_ctx[ring_id].valid = false;
+	context_page_num = workload->engine->context_size;
+	context_page_num = context_page_num >> PAGE_SHIFT;
+
+	if (IS_BROADWELL(gvt->gt->i915) && workload->engine->id == RCS0)
+		context_page_num = 19;
+
+	/* find consecutive GPAs from gma until the first inconsecutive GPA.
+	 * read from the continuous GPAs into dst virtual address
+	 */
+	gpa_size = 0;
+	for (i = 2; i < context_page_num; i++) {
+		context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
+				(u32)((workload->ctx_desc.lrca + i) <<
+				I915_GTT_PAGE_SHIFT));
+		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
+			gvt_vgpu_err("Invalid guest context descriptor\n");
+			return -EFAULT;
+		}
+
+		if (gpa_size == 0) {
+			gpa_base = context_gpa;
+			dst = context_base + (i << I915_GTT_PAGE_SHIFT);
+		} else if (context_gpa != gpa_base + gpa_size)
+			goto read;
+
+		gpa_size += I915_GTT_PAGE_SIZE;
+
+		if (i == context_page_num - 1)
+			goto read;
+
+		continue;
+
+read:
+		intel_gvt_read_gpa(vgpu, gpa_base, dst, gpa_size);
+		gpa_base = context_gpa;
+		gpa_size = I915_GTT_PAGE_SIZE;
+		dst = context_base + (i << I915_GTT_PAGE_SHIFT);
+	}
+	ret = intel_gvt_scan_engine_context(workload);
+	if (ret) {
+		gvt_vgpu_err("invalid cmd found in guest context pages\n");
+		return ret;
+	}
+	s->last_ctx[ring_id].valid = true;
+	return 0;
+}
+
+static inline bool is_gvt_request(struct i915_request *rq)
+{
+	return intel_context_force_single_submission(rq->context);
+}
+
+static void save_ring_hw_state(struct intel_vgpu *vgpu,
+			       const struct intel_engine_cs *engine)
+{
+	struct intel_uncore *uncore = engine->uncore;
+	i915_reg_t reg;
+
+	reg = RING_INSTDONE(engine->mmio_base);
+	vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) =
+		intel_uncore_read(uncore, reg);
+
+	reg = RING_ACTHD(engine->mmio_base);
+	vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) =
+		intel_uncore_read(uncore, reg);
+
+	reg = RING_ACTHD_UDW(engine->mmio_base);
+	vgpu_vreg(vgpu, i915_mmio_reg_offset(reg)) =
+		intel_uncore_read(uncore, reg);
+}
+
+static int shadow_context_status_change(struct notifier_block *nb,
+		unsigned long action, void *data)
+{
+	struct i915_request *rq = data;
+	struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
+				shadow_ctx_notifier_block[rq->engine->id]);
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	enum intel_engine_id ring_id = rq->engine->id;
+	struct intel_vgpu_workload *workload;
+	unsigned long flags;
+
+	if (!is_gvt_request(rq)) {
+		spin_lock_irqsave(&scheduler->mmio_context_lock, flags);
+		if (action == INTEL_CONTEXT_SCHEDULE_IN &&
+		    scheduler->engine_owner[ring_id]) {
+			/* Switch ring from vGPU to host. */
+			intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
+					      NULL, rq->engine);
+			scheduler->engine_owner[ring_id] = NULL;
+		}
+		spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags);
+
+		return NOTIFY_OK;
+	}
+
+	workload = scheduler->current_workload[ring_id];
+	if (unlikely(!workload))
+		return NOTIFY_OK;
+
+	switch (action) {
+	case INTEL_CONTEXT_SCHEDULE_IN:
+		spin_lock_irqsave(&scheduler->mmio_context_lock, flags);
+		if (workload->vgpu != scheduler->engine_owner[ring_id]) {
+			/* Switch ring from host to vGPU or vGPU to vGPU. */
+			intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
+					      workload->vgpu, rq->engine);
+			scheduler->engine_owner[ring_id] = workload->vgpu;
+		} else
+			gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
+				      ring_id, workload->vgpu->id);
+		spin_unlock_irqrestore(&scheduler->mmio_context_lock, flags);
+		atomic_set(&workload->shadow_ctx_active, 1);
+		break;
+	case INTEL_CONTEXT_SCHEDULE_OUT:
+		save_ring_hw_state(workload->vgpu, rq->engine);
+		atomic_set(&workload->shadow_ctx_active, 0);
+		break;
+	case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
+		save_ring_hw_state(workload->vgpu, rq->engine);
+		break;
+	default:
+		WARN_ON(1);
+		return NOTIFY_OK;
+	}
+	wake_up(&workload->shadow_ctx_status_wq);
+	return NOTIFY_OK;
+}
+
+static void
+shadow_context_descriptor_update(struct intel_context *ce,
+				 struct intel_vgpu_workload *workload)
+{
+	u64 desc = ce->lrc.desc;
+
+	/*
+	 * Update bits 0-11 of the context descriptor which includes flags
+	 * like GEN8_CTX_* cached in desc_template
+	 */
+	desc &= ~(0x3ull << GEN8_CTX_ADDRESSING_MODE_SHIFT);
+	desc |= (u64)workload->ctx_desc.addressing_mode <<
+		GEN8_CTX_ADDRESSING_MODE_SHIFT;
+
+	ce->lrc.desc = desc;
+}
+
+static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct i915_request *req = workload->req;
+	void *shadow_ring_buffer_va;
+	u32 *cs;
+	int err;
+
+	if (GRAPHICS_VER(req->engine->i915) == 9 && is_inhibit_context(req->context))
+		intel_vgpu_restore_inhibit_context(vgpu, req);
+
+	/*
+	 * To track whether a request has started on HW, we can emit a
+	 * breadcrumb at the beginning of the request and check its
+	 * timeline's HWSP to see if the breadcrumb has advanced past the
+	 * start of this request. Actually, the request must have the
+	 * init_breadcrumb if its timeline set has_init_bread_crumb, or the
+	 * scheduler might get a wrong state of it during reset. Since the
+	 * requests from gvt always set the has_init_breadcrumb flag, here
+	 * need to do the emit_init_breadcrumb for all the requests.
+	 */
+	if (req->engine->emit_init_breadcrumb) {
+		err = req->engine->emit_init_breadcrumb(req);
+		if (err) {
+			gvt_vgpu_err("fail to emit init breadcrumb\n");
+			return err;
+		}
+	}
+
+	/* allocate shadow ring buffer */
+	cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
+	if (IS_ERR(cs)) {
+		gvt_vgpu_err("fail to alloc size =%ld shadow  ring buffer\n",
+			workload->rb_len);
+		return PTR_ERR(cs);
+	}
+
+	shadow_ring_buffer_va = workload->shadow_ring_buffer_va;
+
+	/* get shadow ring buffer va */
+	workload->shadow_ring_buffer_va = cs;
+
+	memcpy(cs, shadow_ring_buffer_va,
+			workload->rb_len);
+
+	cs += workload->rb_len / sizeof(u32);
+	intel_ring_advance(workload->req, cs);
+
+	return 0;
+}
+
+static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+{
+	if (!wa_ctx->indirect_ctx.obj)
+		return;
+
+	i915_gem_object_lock(wa_ctx->indirect_ctx.obj, NULL);
+	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
+	i915_gem_object_unlock(wa_ctx->indirect_ctx.obj);
+	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
+
+	wa_ctx->indirect_ctx.obj = NULL;
+	wa_ctx->indirect_ctx.shadow_va = NULL;
+}
+
+static void set_dma_address(struct i915_page_directory *pd, dma_addr_t addr)
+{
+	struct scatterlist *sg = pd->pt.base->mm.pages->sgl;
+
+	/* This is not a good idea */
+	sg->dma_address = addr;
+}
+
+static void set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
+					  struct intel_context *ce)
+{
+	struct intel_vgpu_mm *mm = workload->shadow_mm;
+	struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(ce->vm);
+	int i = 0;
+
+	if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+		set_dma_address(ppgtt->pd, mm->ppgtt_mm.shadow_pdps[0]);
+	} else {
+		for (i = 0; i < GVT_RING_CTX_NR_PDPS; i++) {
+			struct i915_page_directory * const pd =
+				i915_pd_entry(ppgtt->pd, i);
+			/* skip now as current i915 ppgtt alloc won't allocate
+			   top level pdp for non 4-level table, won't impact
+			   shadow ppgtt. */
+			if (!pd)
+				break;
+
+			set_dma_address(pd, mm->ppgtt_mm.shadow_pdps[i]);
+		}
+	}
+}
+
+static int
+intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct i915_request *rq;
+
+	if (workload->req)
+		return 0;
+
+	rq = i915_request_create(s->shadow[workload->engine->id]);
+	if (IS_ERR(rq)) {
+		gvt_vgpu_err("fail to allocate gem request\n");
+		return PTR_ERR(rq);
+	}
+
+	workload->req = i915_request_get(rq);
+	return 0;
+}
+
+/**
+ * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
+ * shadow it as well, include ringbuffer,wa_ctx and ctx.
+ * @workload: an abstract entity for each execlist submission.
+ *
+ * This function is called before the workload submitting to i915, to make
+ * sure the content of the workload is valid.
+ */
+int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	int ret;
+
+	lockdep_assert_held(&vgpu->vgpu_lock);
+
+	if (workload->shadow)
+		return 0;
+
+	if (!test_and_set_bit(workload->engine->id, s->shadow_ctx_desc_updated))
+		shadow_context_descriptor_update(s->shadow[workload->engine->id],
+						 workload);
+
+	ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
+	if (ret)
+		return ret;
+
+	if (workload->engine->id == RCS0 &&
+	    workload->wa_ctx.indirect_ctx.size) {
+		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
+		if (ret)
+			goto err_shadow;
+	}
+
+	workload->shadow = true;
+	return 0;
+
+err_shadow:
+	release_shadow_wa_ctx(&workload->wa_ctx);
+	return ret;
+}
+
+static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload);
+
+static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
+{
+	struct intel_gvt *gvt = workload->vgpu->gvt;
+	const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd;
+	struct intel_vgpu_shadow_bb *bb;
+	struct i915_gem_ww_ctx ww;
+	int ret;
+
+	list_for_each_entry(bb, &workload->shadow_bb, list) {
+		/* For privilge batch buffer and not wa_ctx, the bb_start_cmd_va
+		 * is only updated into ring_scan_buffer, not real ring address
+		 * allocated in later copy_workload_to_ring_buffer. pls be noted
+		 * shadow_ring_buffer_va is now pointed to real ring buffer va
+		 * in copy_workload_to_ring_buffer.
+		 */
+
+		if (bb->bb_offset)
+			bb->bb_start_cmd_va = workload->shadow_ring_buffer_va
+				+ bb->bb_offset;
+
+		/*
+		 * For non-priv bb, scan&shadow is only for
+		 * debugging purpose, so the content of shadow bb
+		 * is the same as original bb. Therefore,
+		 * here, rather than switch to shadow bb's gma
+		 * address, we directly use original batch buffer's
+		 * gma address, and send original bb to hardware
+		 * directly
+		 */
+		if (!bb->ppgtt) {
+			i915_gem_ww_ctx_init(&ww, false);
+retry:
+			i915_gem_object_lock(bb->obj, &ww);
+
+			bb->vma = i915_gem_object_ggtt_pin_ww(bb->obj, &ww,
+							      NULL, 0, 0, 0);
+			if (IS_ERR(bb->vma)) {
+				ret = PTR_ERR(bb->vma);
+				if (ret == -EDEADLK) {
+					ret = i915_gem_ww_ctx_backoff(&ww);
+					if (!ret)
+						goto retry;
+				}
+				goto err;
+			}
+
+			/* relocate shadow batch buffer */
+			bb->bb_start_cmd_va[1] = i915_ggtt_offset(bb->vma);
+			if (gmadr_bytes == 8)
+				bb->bb_start_cmd_va[2] = 0;
+
+			ret = i915_vma_move_to_active(bb->vma, workload->req,
+						      __EXEC_OBJECT_NO_REQUEST_AWAIT);
+			if (ret)
+				goto err;
+
+			/* No one is going to touch shadow bb from now on. */
+			i915_gem_object_flush_map(bb->obj);
+			i915_gem_ww_ctx_fini(&ww);
+		}
+	}
+	return 0;
+err:
+	i915_gem_ww_ctx_fini(&ww);
+	release_shadow_batch_buffer(workload);
+	return ret;
+}
+
+static void update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+{
+	struct intel_vgpu_workload *workload =
+		container_of(wa_ctx, struct intel_vgpu_workload, wa_ctx);
+	struct i915_request *rq = workload->req;
+	struct execlist_ring_context *shadow_ring_context =
+		(struct execlist_ring_context *)rq->context->lrc_reg_state;
+
+	shadow_ring_context->bb_per_ctx_ptr.val =
+		(shadow_ring_context->bb_per_ctx_ptr.val &
+		(~PER_CTX_ADDR_MASK)) | wa_ctx->per_ctx.shadow_gma;
+	shadow_ring_context->rcs_indirect_ctx.val =
+		(shadow_ring_context->rcs_indirect_ctx.val &
+		(~INDIRECT_CTX_ADDR_MASK)) | wa_ctx->indirect_ctx.shadow_gma;
+}
+
+static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+{
+	struct i915_vma *vma;
+	unsigned char *per_ctx_va =
+		(unsigned char *)wa_ctx->indirect_ctx.shadow_va +
+		wa_ctx->indirect_ctx.size;
+	struct i915_gem_ww_ctx ww;
+	int ret;
+
+	if (wa_ctx->indirect_ctx.size == 0)
+		return 0;
+
+	i915_gem_ww_ctx_init(&ww, false);
+retry:
+	i915_gem_object_lock(wa_ctx->indirect_ctx.obj, &ww);
+
+	vma = i915_gem_object_ggtt_pin_ww(wa_ctx->indirect_ctx.obj, &ww, NULL,
+					  0, CACHELINE_BYTES, 0);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		if (ret == -EDEADLK) {
+			ret = i915_gem_ww_ctx_backoff(&ww);
+			if (!ret)
+				goto retry;
+		}
+		return ret;
+	}
+
+	i915_gem_ww_ctx_fini(&ww);
+
+	/* FIXME: we are not tracking our pinned VMA leaving it
+	 * up to the core to fix up the stray pin_count upon
+	 * free.
+	 */
+
+	wa_ctx->indirect_ctx.shadow_gma = i915_ggtt_offset(vma);
+
+	wa_ctx->per_ctx.shadow_gma = *((unsigned int *)per_ctx_va + 1);
+	memset(per_ctx_va, 0, CACHELINE_BYTES);
+
+	update_wa_ctx_2_shadow_ctx(wa_ctx);
+	return 0;
+}
+
+static void update_vreg_in_ctx(struct intel_vgpu_workload *workload)
+{
+	vgpu_vreg_t(workload->vgpu, RING_START(workload->engine->mmio_base)) =
+		workload->rb_start;
+}
+
+static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu_shadow_bb *bb, *pos;
+
+	if (list_empty(&workload->shadow_bb))
+		return;
+
+	bb = list_first_entry(&workload->shadow_bb,
+			struct intel_vgpu_shadow_bb, list);
+
+	list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
+		if (bb->obj) {
+			i915_gem_object_lock(bb->obj, NULL);
+			if (bb->va && !IS_ERR(bb->va))
+				i915_gem_object_unpin_map(bb->obj);
+
+			if (bb->vma && !IS_ERR(bb->vma))
+				i915_vma_unpin(bb->vma);
+
+			i915_gem_object_unlock(bb->obj);
+			i915_gem_object_put(bb->obj);
+		}
+		list_del(&bb->list);
+		kfree(bb);
+	}
+}
+
+static int
+intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_mm *m;
+	int ret = 0;
+
+	ret = intel_vgpu_pin_mm(workload->shadow_mm);
+	if (ret) {
+		gvt_vgpu_err("fail to vgpu pin mm\n");
+		return ret;
+	}
+
+	if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
+	    !workload->shadow_mm->ppgtt_mm.shadowed) {
+		intel_vgpu_unpin_mm(workload->shadow_mm);
+		gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
+		return -EINVAL;
+	}
+
+	if (!list_empty(&workload->lri_shadow_mm)) {
+		list_for_each_entry(m, &workload->lri_shadow_mm,
+				    ppgtt_mm.link) {
+			ret = intel_vgpu_pin_mm(m);
+			if (ret) {
+				list_for_each_entry_from_reverse(m,
+								 &workload->lri_shadow_mm,
+								 ppgtt_mm.link)
+					intel_vgpu_unpin_mm(m);
+				gvt_vgpu_err("LRI shadow ppgtt fail to pin\n");
+				break;
+			}
+		}
+	}
+
+	if (ret)
+		intel_vgpu_unpin_mm(workload->shadow_mm);
+
+	return ret;
+}
+
+static void
+intel_vgpu_shadow_mm_unpin(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu_mm *m;
+
+	if (!list_empty(&workload->lri_shadow_mm)) {
+		list_for_each_entry(m, &workload->lri_shadow_mm,
+				    ppgtt_mm.link)
+			intel_vgpu_unpin_mm(m);
+	}
+	intel_vgpu_unpin_mm(workload->shadow_mm);
+}
+
+static int prepare_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	int ret = 0;
+
+	ret = intel_vgpu_shadow_mm_pin(workload);
+	if (ret) {
+		gvt_vgpu_err("fail to pin shadow mm\n");
+		return ret;
+	}
+
+	update_shadow_pdps(workload);
+
+	set_context_ppgtt_from_shadow(workload, s->shadow[workload->engine->id]);
+
+	ret = intel_vgpu_sync_oos_pages(workload->vgpu);
+	if (ret) {
+		gvt_vgpu_err("fail to vgpu sync oos pages\n");
+		goto err_unpin_mm;
+	}
+
+	ret = intel_vgpu_flush_post_shadow(workload->vgpu);
+	if (ret) {
+		gvt_vgpu_err("fail to flush post shadow\n");
+		goto err_unpin_mm;
+	}
+
+	ret = copy_workload_to_ring_buffer(workload);
+	if (ret) {
+		gvt_vgpu_err("fail to generate request\n");
+		goto err_unpin_mm;
+	}
+
+	ret = prepare_shadow_batch_buffer(workload);
+	if (ret) {
+		gvt_vgpu_err("fail to prepare_shadow_batch_buffer\n");
+		goto err_unpin_mm;
+	}
+
+	ret = prepare_shadow_wa_ctx(&workload->wa_ctx);
+	if (ret) {
+		gvt_vgpu_err("fail to prepare_shadow_wa_ctx\n");
+		goto err_shadow_batch;
+	}
+
+	if (workload->prepare) {
+		ret = workload->prepare(workload);
+		if (ret)
+			goto err_shadow_wa_ctx;
+	}
+
+	return 0;
+err_shadow_wa_ctx:
+	release_shadow_wa_ctx(&workload->wa_ctx);
+err_shadow_batch:
+	release_shadow_batch_buffer(workload);
+err_unpin_mm:
+	intel_vgpu_shadow_mm_unpin(workload);
+	return ret;
+}
+
+static int dispatch_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct i915_request *rq;
+	int ret;
+
+	gvt_dbg_sched("ring id %s prepare to dispatch workload %p\n",
+		      workload->engine->name, workload);
+
+	mutex_lock(&vgpu->vgpu_lock);
+
+	ret = intel_gvt_workload_req_alloc(workload);
+	if (ret)
+		goto err_req;
+
+	ret = intel_gvt_scan_and_shadow_workload(workload);
+	if (ret)
+		goto out;
+
+	ret = populate_shadow_context(workload);
+	if (ret) {
+		release_shadow_wa_ctx(&workload->wa_ctx);
+		goto out;
+	}
+
+	ret = prepare_workload(workload);
+out:
+	if (ret) {
+		/* We might still need to add request with
+		 * clean ctx to retire it properly..
+		 */
+		rq = fetch_and_zero(&workload->req);
+		i915_request_put(rq);
+	}
+
+	if (!IS_ERR_OR_NULL(workload->req)) {
+		gvt_dbg_sched("ring id %s submit workload to i915 %p\n",
+			      workload->engine->name, workload->req);
+		i915_request_add(workload->req);
+		workload->dispatched = true;
+	}
+err_req:
+	if (ret)
+		workload->status = ret;
+	mutex_unlock(&vgpu->vgpu_lock);
+	return ret;
+}
+
+static struct intel_vgpu_workload *
+pick_next_workload(struct intel_gvt *gvt, struct intel_engine_cs *engine)
+{
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	struct intel_vgpu_workload *workload = NULL;
+
+	mutex_lock(&gvt->sched_lock);
+
+	/*
+	 * no current vgpu / will be scheduled out / no workload
+	 * bail out
+	 */
+	if (!scheduler->current_vgpu) {
+		gvt_dbg_sched("ring %s stop - no current vgpu\n", engine->name);
+		goto out;
+	}
+
+	if (scheduler->need_reschedule) {
+		gvt_dbg_sched("ring %s stop - will reschedule\n", engine->name);
+		goto out;
+	}
+
+	if (!test_bit(INTEL_VGPU_STATUS_ACTIVE,
+		      scheduler->current_vgpu->status) ||
+	    list_empty(workload_q_head(scheduler->current_vgpu, engine)))
+		goto out;
+
+	/*
+	 * still have current workload, maybe the workload disptacher
+	 * fail to submit it for some reason, resubmit it.
+	 */
+	if (scheduler->current_workload[engine->id]) {
+		workload = scheduler->current_workload[engine->id];
+		gvt_dbg_sched("ring %s still have current workload %p\n",
+			      engine->name, workload);
+		goto out;
+	}
+
+	/*
+	 * pick a workload as current workload
+	 * once current workload is set, schedule policy routines
+	 * will wait the current workload is finished when trying to
+	 * schedule out a vgpu.
+	 */
+	scheduler->current_workload[engine->id] =
+		list_first_entry(workload_q_head(scheduler->current_vgpu,
+						 engine),
+				 struct intel_vgpu_workload, list);
+
+	workload = scheduler->current_workload[engine->id];
+
+	gvt_dbg_sched("ring %s pick new workload %p\n", engine->name, workload);
+
+	atomic_inc(&workload->vgpu->submission.running_workload_num);
+out:
+	mutex_unlock(&gvt->sched_lock);
+	return workload;
+}
+
+static void update_guest_pdps(struct intel_vgpu *vgpu,
+			      u64 ring_context_gpa, u32 pdp[8])
+{
+	u64 gpa;
+	int i;
+
+	gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val);
+
+	for (i = 0; i < 8; i++)
+		intel_gvt_write_gpa(vgpu, gpa + i * 8, &pdp[7 - i], 4);
+}
+
+static __maybe_unused bool
+check_shadow_context_ppgtt(struct execlist_ring_context *c, struct intel_vgpu_mm *m)
+{
+	if (m->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
+		u64 shadow_pdp = c->pdps[7].val | (u64) c->pdps[6].val << 32;
+
+		if (shadow_pdp != m->ppgtt_mm.shadow_pdps[0]) {
+			gvt_dbg_mm("4-level context ppgtt not match LRI command\n");
+			return false;
+		}
+		return true;
+	} else {
+		/* see comment in LRI handler in cmd_parser.c */
+		gvt_dbg_mm("invalid shadow mm type\n");
+		return false;
+	}
+}
+
+static void update_guest_context(struct intel_vgpu_workload *workload)
+{
+	struct i915_request *rq = workload->req;
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct execlist_ring_context *shadow_ring_context;
+	struct intel_context *ctx = workload->req->context;
+	void *context_base;
+	void *src;
+	unsigned long context_gpa, context_page_num;
+	unsigned long gpa_base; /* first gpa of consecutive GPAs */
+	unsigned long gpa_size; /* size of consecutive GPAs*/
+	int i;
+	u32 ring_base;
+	u32 head, tail;
+	u16 wrap_count;
+
+	gvt_dbg_sched("ring id %d workload lrca %x\n", rq->engine->id,
+		      workload->ctx_desc.lrca);
+
+	GEM_BUG_ON(!intel_context_is_pinned(ctx));
+
+	head = workload->rb_head;
+	tail = workload->rb_tail;
+	wrap_count = workload->guest_rb_head >> RB_HEAD_WRAP_CNT_OFF;
+
+	if (tail < head) {
+		if (wrap_count == RB_HEAD_WRAP_CNT_MAX)
+			wrap_count = 0;
+		else
+			wrap_count += 1;
+	}
+
+	head = (wrap_count << RB_HEAD_WRAP_CNT_OFF) | tail;
+
+	ring_base = rq->engine->mmio_base;
+	vgpu_vreg_t(vgpu, RING_TAIL(ring_base)) = tail;
+	vgpu_vreg_t(vgpu, RING_HEAD(ring_base)) = head;
+
+	context_page_num = rq->engine->context_size;
+	context_page_num = context_page_num >> PAGE_SHIFT;
+
+	if (IS_BROADWELL(rq->i915) && rq->engine->id == RCS0)
+		context_page_num = 19;
+
+	context_base = (void *) ctx->lrc_reg_state -
+			(LRC_STATE_PN << I915_GTT_PAGE_SHIFT);
+
+	/* find consecutive GPAs from gma until the first inconsecutive GPA.
+	 * write to the consecutive GPAs from src virtual address
+	 */
+	gpa_size = 0;
+	for (i = 2; i < context_page_num; i++) {
+		context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
+				(u32)((workload->ctx_desc.lrca + i) <<
+					I915_GTT_PAGE_SHIFT));
+		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
+			gvt_vgpu_err("invalid guest context descriptor\n");
+			return;
+		}
+
+		if (gpa_size == 0) {
+			gpa_base = context_gpa;
+			src = context_base + (i << I915_GTT_PAGE_SHIFT);
+		} else if (context_gpa != gpa_base + gpa_size)
+			goto write;
+
+		gpa_size += I915_GTT_PAGE_SIZE;
+
+		if (i == context_page_num - 1)
+			goto write;
+
+		continue;
+
+write:
+		intel_gvt_write_gpa(vgpu, gpa_base, src, gpa_size);
+		gpa_base = context_gpa;
+		gpa_size = I915_GTT_PAGE_SIZE;
+		src = context_base + (i << I915_GTT_PAGE_SHIFT);
+	}
+
+	intel_gvt_write_gpa(vgpu, workload->ring_context_gpa +
+		RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);
+
+	shadow_ring_context = (void *) ctx->lrc_reg_state;
+
+	if (!list_empty(&workload->lri_shadow_mm)) {
+		struct intel_vgpu_mm *m = list_last_entry(&workload->lri_shadow_mm,
+							  struct intel_vgpu_mm,
+							  ppgtt_mm.link);
+		GEM_BUG_ON(!check_shadow_context_ppgtt(shadow_ring_context, m));
+		update_guest_pdps(vgpu, workload->ring_context_gpa,
+				  (void *)m->ppgtt_mm.guest_pdps);
+	}
+
+#define COPY_REG(name) \
+	intel_gvt_write_gpa(vgpu, workload->ring_context_gpa + \
+		RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)
+
+	COPY_REG(ctx_ctrl);
+	COPY_REG(ctx_timestamp);
+
+#undef COPY_REG
+
+	intel_gvt_write_gpa(vgpu,
+			workload->ring_context_gpa +
+			sizeof(*shadow_ring_context),
+			(void *)shadow_ring_context +
+			sizeof(*shadow_ring_context),
+			I915_GTT_PAGE_SIZE - sizeof(*shadow_ring_context));
+}
+
+void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
+				intel_engine_mask_t engine_mask)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	struct intel_vgpu_workload *pos, *n;
+	intel_engine_mask_t tmp;
+
+	/* free the unsubmited workloads in the queues. */
+	for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) {
+		list_for_each_entry_safe(pos, n,
+			&s->workload_q_head[engine->id], list) {
+			list_del_init(&pos->list);
+			intel_vgpu_destroy_workload(pos);
+		}
+		clear_bit(engine->id, s->shadow_ctx_desc_updated);
+	}
+}
+
+static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
+{
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	struct intel_vgpu_workload *workload =
+		scheduler->current_workload[ring_id];
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct i915_request *rq = workload->req;
+	int event;
+
+	mutex_lock(&vgpu->vgpu_lock);
+	mutex_lock(&gvt->sched_lock);
+
+	/* For the workload w/ request, needs to wait for the context
+	 * switch to make sure request is completed.
+	 * For the workload w/o request, directly complete the workload.
+	 */
+	if (rq) {
+		wait_event(workload->shadow_ctx_status_wq,
+			   !atomic_read(&workload->shadow_ctx_active));
+
+		/* If this request caused GPU hang, req->fence.error will
+		 * be set to -EIO. Use -EIO to set workload status so
+		 * that when this request caused GPU hang, didn't trigger
+		 * context switch interrupt to guest.
+		 */
+		if (likely(workload->status == -EINPROGRESS)) {
+			if (workload->req->fence.error == -EIO)
+				workload->status = -EIO;
+			else
+				workload->status = 0;
+		}
+
+		if (!workload->status &&
+		    !(vgpu->resetting_eng & BIT(ring_id))) {
+			update_guest_context(workload);
+
+			for_each_set_bit(event, workload->pending_events,
+					 INTEL_GVT_EVENT_MAX)
+				intel_vgpu_trigger_virtual_event(vgpu, event);
+		}
+
+		i915_request_put(fetch_and_zero(&workload->req));
+	}
+
+	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
+			ring_id, workload, workload->status);
+
+	scheduler->current_workload[ring_id] = NULL;
+
+	list_del_init(&workload->list);
+
+	if (workload->status || vgpu->resetting_eng & BIT(ring_id)) {
+		/* if workload->status is not successful means HW GPU
+		 * has occurred GPU hang or something wrong with i915/GVT,
+		 * and GVT won't inject context switch interrupt to guest.
+		 * So this error is a vGPU hang actually to the guest.
+		 * According to this we should emunlate a vGPU hang. If
+		 * there are pending workloads which are already submitted
+		 * from guest, we should clean them up like HW GPU does.
+		 *
+		 * if it is in middle of engine resetting, the pending
+		 * workloads won't be submitted to HW GPU and will be
+		 * cleaned up during the resetting process later, so doing
+		 * the workload clean up here doesn't have any impact.
+		 **/
+		intel_vgpu_clean_workloads(vgpu, BIT(ring_id));
+	}
+
+	workload->complete(workload);
+
+	intel_vgpu_shadow_mm_unpin(workload);
+	intel_vgpu_destroy_workload(workload);
+
+	atomic_dec(&s->running_workload_num);
+	wake_up(&scheduler->workload_complete_wq);
+
+	if (gvt->scheduler.need_reschedule)
+		intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
+
+	mutex_unlock(&gvt->sched_lock);
+	mutex_unlock(&vgpu->vgpu_lock);
+}
+
+static int workload_thread(void *arg)
+{
+	struct intel_engine_cs *engine = arg;
+	const bool need_force_wake = GRAPHICS_VER(engine->i915) >= 9;
+	struct intel_gvt *gvt = engine->i915->gvt;
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	struct intel_vgpu_workload *workload = NULL;
+	struct intel_vgpu *vgpu = NULL;
+	int ret;
+	DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+	gvt_dbg_core("workload thread for ring %s started\n", engine->name);
+
+	while (!kthread_should_stop()) {
+		intel_wakeref_t wakeref;
+
+		add_wait_queue(&scheduler->waitq[engine->id], &wait);
+		do {
+			workload = pick_next_workload(gvt, engine);
+			if (workload)
+				break;
+			wait_woken(&wait, TASK_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+		} while (!kthread_should_stop());
+		remove_wait_queue(&scheduler->waitq[engine->id], &wait);
+
+		if (!workload)
+			break;
+
+		gvt_dbg_sched("ring %s next workload %p vgpu %d\n",
+			      engine->name, workload,
+			      workload->vgpu->id);
+
+		wakeref = intel_runtime_pm_get(engine->uncore->rpm);
+
+		gvt_dbg_sched("ring %s will dispatch workload %p\n",
+			      engine->name, workload);
+
+		if (need_force_wake)
+			intel_uncore_forcewake_get(engine->uncore,
+						   FORCEWAKE_ALL);
+		/*
+		 * Update the vReg of the vGPU which submitted this
+		 * workload. The vGPU may use these registers for checking
+		 * the context state. The value comes from GPU commands
+		 * in this workload.
+		 */
+		update_vreg_in_ctx(workload);
+
+		ret = dispatch_workload(workload);
+
+		if (ret) {
+			vgpu = workload->vgpu;
+			gvt_vgpu_err("fail to dispatch workload, skip\n");
+			goto complete;
+		}
+
+		gvt_dbg_sched("ring %s wait workload %p\n",
+			      engine->name, workload);
+		i915_request_wait(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
+
+complete:
+		gvt_dbg_sched("will complete workload %p, status: %d\n",
+			      workload, workload->status);
+
+		complete_current_workload(gvt, engine->id);
+
+		if (need_force_wake)
+			intel_uncore_forcewake_put(engine->uncore,
+						   FORCEWAKE_ALL);
+
+		intel_runtime_pm_put(engine->uncore->rpm, wakeref);
+		if (ret && (vgpu_is_vm_unhealthy(ret)))
+			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
+	}
+	return 0;
+}
+
+void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+
+	if (atomic_read(&s->running_workload_num)) {
+		gvt_dbg_sched("wait vgpu idle\n");
+
+		wait_event(scheduler->workload_complete_wq,
+				!atomic_read(&s->running_workload_num));
+	}
+}
+
+void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
+{
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id i;
+
+	gvt_dbg_core("clean workload scheduler\n");
+
+	for_each_engine(engine, gvt->gt, i) {
+		atomic_notifier_chain_unregister(
+					&engine->context_status_notifier,
+					&gvt->shadow_ctx_notifier_block[i]);
+		kthread_stop(scheduler->thread[i]);
+	}
+}
+
+int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
+{
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id i;
+	int ret;
+
+	gvt_dbg_core("init workload scheduler\n");
+
+	init_waitqueue_head(&scheduler->workload_complete_wq);
+
+	for_each_engine(engine, gvt->gt, i) {
+		init_waitqueue_head(&scheduler->waitq[i]);
+
+		scheduler->thread[i] = kthread_run(workload_thread, engine,
+						   "gvt:%s", engine->name);
+		if (IS_ERR(scheduler->thread[i])) {
+			gvt_err("fail to create workload thread\n");
+			ret = PTR_ERR(scheduler->thread[i]);
+			goto err;
+		}
+
+		gvt->shadow_ctx_notifier_block[i].notifier_call =
+					shadow_context_status_change;
+		atomic_notifier_chain_register(&engine->context_status_notifier,
+					&gvt->shadow_ctx_notifier_block[i]);
+	}
+
+	return 0;
+
+err:
+	intel_gvt_clean_workload_scheduler(gvt);
+	return ret;
+}
+
+static void
+i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s,
+				struct i915_ppgtt *ppgtt)
+{
+	int i;
+
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		set_dma_address(ppgtt->pd, s->i915_context_pml4);
+	} else {
+		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+			struct i915_page_directory * const pd =
+				i915_pd_entry(ppgtt->pd, i);
+
+			set_dma_address(pd, s->i915_context_pdps[i]);
+		}
+	}
+}
+
+/**
+ * intel_vgpu_clean_submission - free submission-related resource for vGPU
+ * @vgpu: a vGPU
+ *
+ * This function is called when a vGPU is being destroyed.
+ *
+ */
+void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
+
+	i915_context_ppgtt_root_restore(s, i915_vm_to_ppgtt(s->shadow[0]->vm));
+	for_each_engine(engine, vgpu->gvt->gt, id)
+		intel_context_put(s->shadow[id]);
+
+	kmem_cache_destroy(s->workloads);
+}
+
+
+/**
+ * intel_vgpu_reset_submission - reset submission-related resource for vGPU
+ * @vgpu: a vGPU
+ * @engine_mask: engines expected to be reset
+ *
+ * This function is called when a vGPU is being destroyed.
+ *
+ */
+void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
+				 intel_engine_mask_t engine_mask)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+
+	if (!s->active)
+		return;
+
+	intel_vgpu_clean_workloads(vgpu, engine_mask);
+	s->ops->reset(vgpu, engine_mask);
+}
+
+static void
+i915_context_ppgtt_root_save(struct intel_vgpu_submission *s,
+			     struct i915_ppgtt *ppgtt)
+{
+	int i;
+
+	if (i915_vm_is_4lvl(&ppgtt->vm)) {
+		s->i915_context_pml4 = px_dma(ppgtt->pd);
+	} else {
+		for (i = 0; i < GEN8_3LVL_PDPES; i++) {
+			struct i915_page_directory * const pd =
+				i915_pd_entry(ppgtt->pd, i);
+
+			s->i915_context_pdps[i] = px_dma(pd);
+		}
+	}
+}
+
+/**
+ * intel_vgpu_setup_submission - setup submission-related resource for vGPU
+ * @vgpu: a vGPU
+ *
+ * This function is called when a vGPU is being created.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_engine_cs *engine;
+	struct i915_ppgtt *ppgtt;
+	enum intel_engine_id i;
+	int ret;
+
+	ppgtt = i915_ppgtt_create(to_gt(i915), I915_BO_ALLOC_PM_EARLY);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	i915_context_ppgtt_root_save(s, ppgtt);
+
+	for_each_engine(engine, vgpu->gvt->gt, i) {
+		struct intel_context *ce;
+
+		INIT_LIST_HEAD(&s->workload_q_head[i]);
+		s->shadow[i] = ERR_PTR(-EINVAL);
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			ret = PTR_ERR(ce);
+			goto out_shadow_ctx;
+		}
+
+		i915_vm_put(ce->vm);
+		ce->vm = i915_vm_get(&ppgtt->vm);
+		intel_context_set_single_submission(ce);
+
+		/* Max ring buffer size */
+		if (!intel_uc_wants_guc_submission(&engine->gt->uc))
+			ce->ring_size = SZ_2M;
+
+		s->shadow[i] = ce;
+	}
+
+	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
+
+	s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
+						  sizeof(struct intel_vgpu_workload), 0,
+						  SLAB_HWCACHE_ALIGN,
+						  offsetof(struct intel_vgpu_workload, rb_tail),
+						  sizeof_field(struct intel_vgpu_workload, rb_tail),
+						  NULL);
+
+	if (!s->workloads) {
+		ret = -ENOMEM;
+		goto out_shadow_ctx;
+	}
+
+	atomic_set(&s->running_workload_num, 0);
+	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
+
+	memset(s->last_ctx, 0, sizeof(s->last_ctx));
+
+	i915_vm_put(&ppgtt->vm);
+	return 0;
+
+out_shadow_ctx:
+	i915_context_ppgtt_root_restore(s, ppgtt);
+	for_each_engine(engine, vgpu->gvt->gt, i) {
+		if (IS_ERR(s->shadow[i]))
+			break;
+
+		intel_context_put(s->shadow[i]);
+	}
+	i915_vm_put(&ppgtt->vm);
+	return ret;
+}
+
+/**
+ * intel_vgpu_select_submission_ops - select virtual submission interface
+ * @vgpu: a vGPU
+ * @engine_mask: either ALL_ENGINES or target engine mask
+ * @interface: expected vGPU virtual submission interface
+ *
+ * This function is called when guest configures submission interface.
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ *
+ */
+int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu,
+				     intel_engine_mask_t engine_mask,
+				     unsigned int interface)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	const struct intel_vgpu_submission_ops *ops[] = {
+		[INTEL_VGPU_EXECLIST_SUBMISSION] =
+			&intel_vgpu_execlist_submission_ops,
+	};
+	int ret;
+
+	if (drm_WARN_ON(&i915->drm, interface >= ARRAY_SIZE(ops)))
+		return -EINVAL;
+
+	if (drm_WARN_ON(&i915->drm,
+			interface == 0 && engine_mask != ALL_ENGINES))
+		return -EINVAL;
+
+	if (s->active)
+		s->ops->clean(vgpu, engine_mask);
+
+	if (interface == 0) {
+		s->ops = NULL;
+		s->virtual_submission_interface = 0;
+		s->active = false;
+		gvt_dbg_core("vgpu%d: remove submission ops\n", vgpu->id);
+		return 0;
+	}
+
+	ret = ops[interface]->init(vgpu, engine_mask);
+	if (ret)
+		return ret;
+
+	s->ops = ops[interface];
+	s->virtual_submission_interface = interface;
+	s->active = true;
+
+	gvt_dbg_core("vgpu%d: activate ops [ %s ]\n",
+			vgpu->id, s->ops->name);
+
+	return 0;
+}
+
+/**
+ * intel_vgpu_destroy_workload - destroy a vGPU workload
+ * @workload: workload to destroy
+ *
+ * This function is called when destroy a vGPU workload.
+ *
+ */
+void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu_submission *s = &workload->vgpu->submission;
+
+	intel_context_unpin(s->shadow[workload->engine->id]);
+	release_shadow_batch_buffer(workload);
+	release_shadow_wa_ctx(&workload->wa_ctx);
+
+	if (!list_empty(&workload->lri_shadow_mm)) {
+		struct intel_vgpu_mm *m, *mm;
+		list_for_each_entry_safe(m, mm, &workload->lri_shadow_mm,
+					 ppgtt_mm.link) {
+			list_del(&m->ppgtt_mm.link);
+			intel_vgpu_mm_put(m);
+		}
+	}
+
+	GEM_BUG_ON(!list_empty(&workload->lri_shadow_mm));
+	if (workload->shadow_mm)
+		intel_vgpu_mm_put(workload->shadow_mm);
+
+	kmem_cache_free(s->workloads, workload);
+}
+
+static struct intel_vgpu_workload *
+alloc_workload(struct intel_vgpu *vgpu)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct intel_vgpu_workload *workload;
+
+	workload = kmem_cache_zalloc(s->workloads, GFP_KERNEL);
+	if (!workload)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&workload->list);
+	INIT_LIST_HEAD(&workload->shadow_bb);
+	INIT_LIST_HEAD(&workload->lri_shadow_mm);
+
+	init_waitqueue_head(&workload->shadow_ctx_status_wq);
+	atomic_set(&workload->shadow_ctx_active, 0);
+
+	workload->status = -EINPROGRESS;
+	workload->vgpu = vgpu;
+
+	return workload;
+}
+
+#define RING_CTX_OFF(x) \
+	offsetof(struct execlist_ring_context, x)
+
+static void read_guest_pdps(struct intel_vgpu *vgpu,
+		u64 ring_context_gpa, u32 pdp[8])
+{
+	u64 gpa;
+	int i;
+
+	gpa = ring_context_gpa + RING_CTX_OFF(pdps[0].val);
+
+	for (i = 0; i < 8; i++)
+		intel_gvt_read_gpa(vgpu,
+				gpa + i * 8, &pdp[7 - i], 4);
+}
+
+static int prepare_mm(struct intel_vgpu_workload *workload)
+{
+	struct execlist_ctx_descriptor_format *desc = &workload->ctx_desc;
+	struct intel_vgpu_mm *mm;
+	struct intel_vgpu *vgpu = workload->vgpu;
+	enum intel_gvt_gtt_type root_entry_type;
+	u64 pdps[GVT_RING_CTX_NR_PDPS];
+
+	switch (desc->addressing_mode) {
+	case 1: /* legacy 32-bit */
+		root_entry_type = GTT_TYPE_PPGTT_ROOT_L3_ENTRY;
+		break;
+	case 3: /* legacy 64-bit */
+		root_entry_type = GTT_TYPE_PPGTT_ROOT_L4_ENTRY;
+		break;
+	default:
+		gvt_vgpu_err("Advanced Context mode(SVM) is not supported!\n");
+		return -EINVAL;
+	}
+
+	read_guest_pdps(workload->vgpu, workload->ring_context_gpa, (void *)pdps);
+
+	mm = intel_vgpu_get_ppgtt_mm(workload->vgpu, root_entry_type, pdps);
+	if (IS_ERR(mm))
+		return PTR_ERR(mm);
+
+	workload->shadow_mm = mm;
+	return 0;
+}
+
+#define same_context(a, b) (((a)->context_id == (b)->context_id) && \
+		((a)->lrca == (b)->lrca))
+
+/**
+ * intel_vgpu_create_workload - create a vGPU workload
+ * @vgpu: a vGPU
+ * @engine: the engine
+ * @desc: a guest context descriptor
+ *
+ * This function is called when creating a vGPU workload.
+ *
+ * Returns:
+ * struct intel_vgpu_workload * on success, negative error code in
+ * pointer if failed.
+ *
+ */
+struct intel_vgpu_workload *
+intel_vgpu_create_workload(struct intel_vgpu *vgpu,
+			   const struct intel_engine_cs *engine,
+			   struct execlist_ctx_descriptor_format *desc)
+{
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct list_head *q = workload_q_head(vgpu, engine);
+	struct intel_vgpu_workload *last_workload = NULL;
+	struct intel_vgpu_workload *workload = NULL;
+	u64 ring_context_gpa;
+	u32 head, tail, start, ctl, ctx_ctl, per_ctx, indirect_ctx;
+	u32 guest_head;
+	int ret;
+
+	ring_context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
+			(u32)((desc->lrca + 1) << I915_GTT_PAGE_SHIFT));
+	if (ring_context_gpa == INTEL_GVT_INVALID_ADDR) {
+		gvt_vgpu_err("invalid guest context LRCA: %x\n", desc->lrca);
+		return ERR_PTR(-EINVAL);
+	}
+
+	intel_gvt_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(ring_header.val), &head, 4);
+
+	intel_gvt_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(ring_tail.val), &tail, 4);
+
+	guest_head = head;
+
+	head &= RB_HEAD_OFF_MASK;
+	tail &= RB_TAIL_OFF_MASK;
+
+	list_for_each_entry_reverse(last_workload, q, list) {
+
+		if (same_context(&last_workload->ctx_desc, desc)) {
+			gvt_dbg_el("ring %s cur workload == last\n",
+				   engine->name);
+			gvt_dbg_el("ctx head %x real head %lx\n", head,
+				   last_workload->rb_tail);
+			/*
+			 * cannot use guest context head pointer here,
+			 * as it might not be updated at this time
+			 */
+			head = last_workload->rb_tail;
+			break;
+		}
+	}
+
+	gvt_dbg_el("ring %s begin a new workload\n", engine->name);
+
+	/* record some ring buffer register values for scan and shadow */
+	intel_gvt_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(rb_start.val), &start, 4);
+	intel_gvt_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(rb_ctrl.val), &ctl, 4);
+	intel_gvt_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(ctx_ctrl.val), &ctx_ctl, 4);
+
+	if (!intel_gvt_ggtt_validate_range(vgpu, start,
+				_RING_CTL_BUF_SIZE(ctl))) {
+		gvt_vgpu_err("context contain invalid rb at: 0x%x\n", start);
+		return ERR_PTR(-EINVAL);
+	}
+
+	workload = alloc_workload(vgpu);
+	if (IS_ERR(workload))
+		return workload;
+
+	workload->engine = engine;
+	workload->ctx_desc = *desc;
+	workload->ring_context_gpa = ring_context_gpa;
+	workload->rb_head = head;
+	workload->guest_rb_head = guest_head;
+	workload->rb_tail = tail;
+	workload->rb_start = start;
+	workload->rb_ctl = ctl;
+
+	if (engine->id == RCS0) {
+		intel_gvt_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(bb_per_ctx_ptr.val), &per_ctx, 4);
+		intel_gvt_read_gpa(vgpu, ring_context_gpa +
+			RING_CTX_OFF(rcs_indirect_ctx.val), &indirect_ctx, 4);
+
+		workload->wa_ctx.indirect_ctx.guest_gma =
+			indirect_ctx & INDIRECT_CTX_ADDR_MASK;
+		workload->wa_ctx.indirect_ctx.size =
+			(indirect_ctx & INDIRECT_CTX_SIZE_MASK) *
+			CACHELINE_BYTES;
+
+		if (workload->wa_ctx.indirect_ctx.size != 0) {
+			if (!intel_gvt_ggtt_validate_range(vgpu,
+				workload->wa_ctx.indirect_ctx.guest_gma,
+				workload->wa_ctx.indirect_ctx.size)) {
+				gvt_vgpu_err("invalid wa_ctx at: 0x%lx\n",
+				    workload->wa_ctx.indirect_ctx.guest_gma);
+				kmem_cache_free(s->workloads, workload);
+				return ERR_PTR(-EINVAL);
+			}
+		}
+
+		workload->wa_ctx.per_ctx.guest_gma =
+			per_ctx & PER_CTX_ADDR_MASK;
+		workload->wa_ctx.per_ctx.valid = per_ctx & 1;
+		if (workload->wa_ctx.per_ctx.valid) {
+			if (!intel_gvt_ggtt_validate_range(vgpu,
+				workload->wa_ctx.per_ctx.guest_gma,
+				CACHELINE_BYTES)) {
+				gvt_vgpu_err("invalid per_ctx at: 0x%lx\n",
+					workload->wa_ctx.per_ctx.guest_gma);
+				kmem_cache_free(s->workloads, workload);
+				return ERR_PTR(-EINVAL);
+			}
+		}
+	}
+
+	gvt_dbg_el("workload %p ring %s head %x tail %x start %x ctl %x\n",
+		   workload, engine->name, head, tail, start, ctl);
+
+	ret = prepare_mm(workload);
+	if (ret) {
+		kmem_cache_free(s->workloads, workload);
+		return ERR_PTR(ret);
+	}
+
+	/* Only scan and shadow the first workload in the queue
+	 * as there is only one pre-allocated buf-obj for shadow.
+	 */
+	if (list_empty(q)) {
+		intel_wakeref_t wakeref;
+
+		with_intel_runtime_pm(engine->gt->uncore->rpm, wakeref)
+			ret = intel_gvt_scan_and_shadow_workload(workload);
+	}
+
+	if (ret) {
+		if (vgpu_is_vm_unhealthy(ret))
+			enter_failsafe_mode(vgpu, GVT_FAILSAFE_GUEST_ERR);
+		intel_vgpu_destroy_workload(workload);
+		return ERR_PTR(ret);
+	}
+
+	ret = intel_context_pin(s->shadow[engine->id]);
+	if (ret) {
+		intel_vgpu_destroy_workload(workload);
+		return ERR_PTR(ret);
+	}
+
+	return workload;
+}
+
+/**
+ * intel_vgpu_queue_workload - Qeue a vGPU workload
+ * @workload: the workload to queue in
+ */
+void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload)
+{
+	list_add_tail(&workload->list,
+		      workload_q_head(workload->vgpu, workload->engine));
+	intel_gvt_kick_schedule(workload->vgpu->gvt);
+	wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->engine->id]);
+}
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
new file mode 100644
index 0000000000..1f391b3da2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ * Contributors:
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Chanbin Du <changbin.du@intel.com>
+ *    Min He <min.he@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *    Zhenyu Wang <zhenyuw@linux.intel.com>
+ *
+ */
+
+#ifndef _GVT_SCHEDULER_H_
+#define _GVT_SCHEDULER_H_
+
+#include "gt/intel_engine_types.h"
+
+#include "execlist.h"
+#include "interrupt.h"
+
+struct intel_gvt_workload_scheduler {
+	struct intel_vgpu *current_vgpu;
+	struct intel_vgpu *next_vgpu;
+	struct intel_vgpu_workload *current_workload[I915_NUM_ENGINES];
+	bool need_reschedule;
+
+	spinlock_t mmio_context_lock;
+	/* can be null when owner is host */
+	struct intel_vgpu *engine_owner[I915_NUM_ENGINES];
+
+	wait_queue_head_t workload_complete_wq;
+	struct task_struct *thread[I915_NUM_ENGINES];
+	wait_queue_head_t waitq[I915_NUM_ENGINES];
+
+	void *sched_data;
+	const struct intel_gvt_sched_policy_ops *sched_ops;
+};
+
+#define INDIRECT_CTX_ADDR_MASK 0xffffffc0
+#define INDIRECT_CTX_SIZE_MASK 0x3f
+struct shadow_indirect_ctx {
+	struct drm_i915_gem_object *obj;
+	unsigned long guest_gma;
+	unsigned long shadow_gma;
+	void *shadow_va;
+	u32 size;
+};
+
+#define PER_CTX_ADDR_MASK 0xfffff000
+struct shadow_per_ctx {
+	unsigned long guest_gma;
+	unsigned long shadow_gma;
+	unsigned valid;
+};
+
+struct intel_shadow_wa_ctx {
+	struct shadow_indirect_ctx indirect_ctx;
+	struct shadow_per_ctx per_ctx;
+
+};
+
+struct intel_vgpu_workload {
+	struct intel_vgpu *vgpu;
+	const struct intel_engine_cs *engine;
+	struct i915_request *req;
+	/* if this workload has been dispatched to i915? */
+	bool dispatched;
+	bool shadow;      /* if workload has done shadow of guest request */
+	int status;
+
+	struct intel_vgpu_mm *shadow_mm;
+	struct list_head lri_shadow_mm; /* For PPGTT load cmd */
+
+	/* different submission model may need different handler */
+	int (*prepare)(struct intel_vgpu_workload *);
+	int (*complete)(struct intel_vgpu_workload *);
+	struct list_head list;
+
+	DECLARE_BITMAP(pending_events, INTEL_GVT_EVENT_MAX);
+	void *shadow_ring_buffer_va;
+
+	/* execlist context information */
+	struct execlist_ctx_descriptor_format ctx_desc;
+	struct execlist_ring_context *ring_context;
+	unsigned long rb_head, rb_tail, rb_ctl, rb_start, rb_len;
+	unsigned long guest_rb_head;
+	bool restore_inhibit;
+	struct intel_vgpu_elsp_dwords elsp_dwords;
+	bool emulate_schedule_in;
+	atomic_t shadow_ctx_active;
+	wait_queue_head_t shadow_ctx_status_wq;
+	u64 ring_context_gpa;
+
+	/* shadow batch buffer */
+	struct list_head shadow_bb;
+	struct intel_shadow_wa_ctx wa_ctx;
+
+	/* oa registers */
+	u32 oactxctrl;
+	u32 flex_mmio[7];
+};
+
+struct intel_vgpu_shadow_bb {
+	struct list_head list;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	void *va;
+	u32 *bb_start_cmd_va;
+	unsigned long bb_offset;
+	bool ppgtt;
+};
+
+#define workload_q_head(vgpu, e) \
+	(&(vgpu)->submission.workload_q_head[(e)->id])
+
+void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload);
+
+int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt);
+
+void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt);
+
+void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu);
+
+int intel_vgpu_setup_submission(struct intel_vgpu *vgpu);
+
+void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
+				 intel_engine_mask_t engine_mask);
+
+void intel_vgpu_clean_submission(struct intel_vgpu *vgpu);
+
+int intel_vgpu_select_submission_ops(struct intel_vgpu *vgpu,
+				     intel_engine_mask_t engine_mask,
+				     unsigned int interface);
+
+extern const struct intel_vgpu_submission_ops
+intel_vgpu_execlist_submission_ops;
+
+struct intel_vgpu_workload *
+intel_vgpu_create_workload(struct intel_vgpu *vgpu,
+			   const struct intel_engine_cs *engine,
+			   struct execlist_ctx_descriptor_format *desc);
+
+void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload);
+
+void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
+				intel_engine_mask_t engine_mask);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
new file mode 100644
index 0000000000..020f1aa283
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -0,0 +1,383 @@
+/*
+ * Copyright © 2011-2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Jike Song <jike.song@intel.com>
+ *
+ * Contributors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#if !defined(_GVT_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _GVT_TRACE_H_
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+#include <linux/tracepoint.h>
+#include <asm/tsc.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gvt
+
+TRACE_EVENT(spt_alloc,
+	TP_PROTO(int id, void *spt, int type, unsigned long mfn,
+		unsigned long gpt_gfn),
+
+	TP_ARGS(id, spt, type, mfn, gpt_gfn),
+
+	TP_STRUCT__entry(
+		__field(int, id)
+		__field(void *, spt)
+		__field(int, type)
+		__field(unsigned long, mfn)
+		__field(unsigned long, gpt_gfn)
+		),
+
+	TP_fast_assign(
+		__entry->id = id;
+		__entry->spt = spt;
+		__entry->type = type;
+		__entry->mfn = mfn;
+		__entry->gpt_gfn = gpt_gfn;
+	),
+
+	TP_printk("VM%d [alloc] spt %p type %d mfn 0x%lx gfn 0x%lx\n",
+		__entry->id,
+		__entry->spt,
+		__entry->type,
+		__entry->mfn,
+		__entry->gpt_gfn)
+);
+
+TRACE_EVENT(spt_free,
+	TP_PROTO(int id, void *spt, int type),
+
+	TP_ARGS(id, spt, type),
+
+	TP_STRUCT__entry(
+		__field(int, id)
+		__field(void *, spt)
+		__field(int, type)
+		),
+
+	TP_fast_assign(
+		__entry->id = id;
+		__entry->spt = spt;
+		__entry->type = type;
+	),
+
+	TP_printk("VM%u [free] spt %p type %d\n",
+		__entry->id,
+		__entry->spt,
+		__entry->type)
+);
+
+#define MAX_BUF_LEN 256
+
+TRACE_EVENT(gma_index,
+	TP_PROTO(const char *prefix, unsigned long gma,
+		unsigned long index),
+
+	TP_ARGS(prefix, gma, index),
+
+	TP_STRUCT__entry(
+		__array(char, buf, MAX_BUF_LEN)
+	),
+
+	TP_fast_assign(
+		snprintf(__entry->buf, MAX_BUF_LEN,
+			"%s gma 0x%lx index 0x%lx\n", prefix, gma, index);
+	),
+
+	TP_printk("%s", __entry->buf)
+);
+
+TRACE_EVENT(gma_translate,
+	TP_PROTO(int id, char *type, int ring_id, int root_entry_type,
+		unsigned long gma, unsigned long gpa),
+
+	TP_ARGS(id, type, ring_id, root_entry_type, gma, gpa),
+
+	TP_STRUCT__entry(
+		__array(char, buf, MAX_BUF_LEN)
+	),
+
+	TP_fast_assign(
+		snprintf(__entry->buf, MAX_BUF_LEN,
+			"VM%d %s ring %d root_entry_type %d gma 0x%lx -> gpa 0x%lx\n",
+			id, type, ring_id, root_entry_type, gma, gpa);
+	),
+
+	TP_printk("%s", __entry->buf)
+);
+
+TRACE_EVENT(spt_refcount,
+	TP_PROTO(int id, char *action, void *spt, int before, int after),
+
+	TP_ARGS(id, action, spt, before, after),
+
+	TP_STRUCT__entry(
+		__array(char, buf, MAX_BUF_LEN)
+	),
+
+	TP_fast_assign(
+		snprintf(__entry->buf, MAX_BUF_LEN,
+			"VM%d [%s] spt %p before %d -> after %d\n",
+				id, action, spt, before, after);
+	),
+
+	TP_printk("%s", __entry->buf)
+);
+
+TRACE_EVENT(spt_change,
+	TP_PROTO(int id, char *action, void *spt, unsigned long gfn,
+		int type),
+
+	TP_ARGS(id, action, spt, gfn, type),
+
+	TP_STRUCT__entry(
+		__array(char, buf, MAX_BUF_LEN)
+	),
+
+	TP_fast_assign(
+		snprintf(__entry->buf, MAX_BUF_LEN,
+			"VM%d [%s] spt %p gfn 0x%lx type %d\n",
+				id, action, spt, gfn, type);
+	),
+
+	TP_printk("%s", __entry->buf)
+);
+
+TRACE_EVENT(spt_guest_change,
+	TP_PROTO(int id, const char *tag, void *spt, int type, u64 v,
+		unsigned long index),
+
+	TP_ARGS(id, tag, spt, type, v, index),
+
+	TP_STRUCT__entry(
+		__array(char, buf, MAX_BUF_LEN)
+	),
+
+	TP_fast_assign(
+		snprintf(__entry->buf, MAX_BUF_LEN,
+		"VM%d [%s] spt %p type %d entry 0x%llx index 0x%lx\n",
+			id, tag, spt, type, v, index);
+	),
+
+	TP_printk("%s", __entry->buf)
+);
+
+TRACE_EVENT(oos_change,
+	TP_PROTO(int id, const char *tag, int page_id, void *gpt, int type),
+
+	TP_ARGS(id, tag, page_id, gpt, type),
+
+	TP_STRUCT__entry(
+		__array(char, buf, MAX_BUF_LEN)
+	),
+
+	TP_fast_assign(
+		snprintf(__entry->buf, MAX_BUF_LEN,
+		"VM%d [oos %s] page id %d gpt %p type %d\n",
+			id, tag, page_id, gpt, type);
+	),
+
+	TP_printk("%s", __entry->buf)
+);
+
+TRACE_EVENT(oos_sync,
+	TP_PROTO(int id, int page_id, void *gpt, int type, u64 v,
+		unsigned long index),
+
+	TP_ARGS(id, page_id, gpt, type, v, index),
+
+	TP_STRUCT__entry(
+		__array(char, buf, MAX_BUF_LEN)
+	),
+
+	TP_fast_assign(
+	snprintf(__entry->buf, MAX_BUF_LEN,
+	"VM%d [oos sync] page id %d gpt %p type %d entry 0x%llx index 0x%lx\n",
+				id, page_id, gpt, type, v, index);
+	),
+
+	TP_printk("%s", __entry->buf)
+);
+
+#define GVT_CMD_STR_LEN 40
+TRACE_EVENT(gvt_command,
+	TP_PROTO(u8 vgpu_id, u8 ring_id, u32 ip_gma, u32 *cmd_va,
+		u32 cmd_len,  u32 buf_type, u32 buf_addr_type,
+		void *workload, const char *cmd_name),
+
+	TP_ARGS(vgpu_id, ring_id, ip_gma, cmd_va, cmd_len, buf_type,
+		buf_addr_type, workload, cmd_name),
+
+	TP_STRUCT__entry(
+		__field(u8, vgpu_id)
+		__field(u8, ring_id)
+		__field(u32, ip_gma)
+		__field(u32, buf_type)
+		__field(u32, buf_addr_type)
+		__field(u32, cmd_len)
+		__field(void*, workload)
+		__dynamic_array(u32, raw_cmd, cmd_len)
+		__array(char, cmd_name, GVT_CMD_STR_LEN)
+	),
+
+	TP_fast_assign(
+		__entry->vgpu_id = vgpu_id;
+		__entry->ring_id = ring_id;
+		__entry->ip_gma = ip_gma;
+		__entry->buf_type = buf_type;
+		__entry->buf_addr_type = buf_addr_type;
+		__entry->cmd_len = cmd_len;
+		__entry->workload = workload;
+		snprintf(__entry->cmd_name, GVT_CMD_STR_LEN, "%s", cmd_name);
+		memcpy(__get_dynamic_array(raw_cmd), cmd_va, cmd_len * sizeof(*cmd_va));
+	),
+
+
+	TP_printk("vgpu%d ring %d: address_type %u, buf_type %u, ip_gma %08x,cmd (name=%s,len=%u,raw cmd=%s), workload=%p\n",
+		__entry->vgpu_id,
+		__entry->ring_id,
+		__entry->buf_addr_type,
+		__entry->buf_type,
+		__entry->ip_gma,
+		__entry->cmd_name,
+		__entry->cmd_len,
+		__print_array(__get_dynamic_array(raw_cmd),
+			__entry->cmd_len, 4),
+		__entry->workload)
+);
+
+#define GVT_TEMP_STR_LEN 10
+TRACE_EVENT(write_ir,
+	TP_PROTO(int id, char *reg_name, unsigned int reg, unsigned int new_val,
+		 unsigned int old_val, bool changed),
+
+	TP_ARGS(id, reg_name, reg, new_val, old_val, changed),
+
+	TP_STRUCT__entry(
+		__field(int, id)
+		__array(char, buf, GVT_TEMP_STR_LEN)
+		__field(unsigned int, reg)
+		__field(unsigned int, new_val)
+		__field(unsigned int, old_val)
+		__field(bool, changed)
+	),
+
+	TP_fast_assign(
+		__entry->id = id;
+		snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", reg_name);
+		__entry->reg = reg;
+		__entry->new_val = new_val;
+		__entry->old_val = old_val;
+		__entry->changed = changed;
+	),
+
+	TP_printk("VM%u write [%s] %x, new %08x, old %08x, changed %08x\n",
+		  __entry->id, __entry->buf, __entry->reg, __entry->new_val,
+		  __entry->old_val, __entry->changed)
+);
+
+TRACE_EVENT(propagate_event,
+	TP_PROTO(int id, const char *irq_name, int bit),
+
+	TP_ARGS(id, irq_name, bit),
+
+	TP_STRUCT__entry(
+		__field(int, id)
+		__array(char, buf, GVT_TEMP_STR_LEN)
+		__field(int, bit)
+	),
+
+	TP_fast_assign(
+		__entry->id = id;
+		snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", irq_name);
+		__entry->bit = bit;
+	),
+
+	TP_printk("Set bit (%d) for (%s) for vgpu (%d)\n",
+		  __entry->bit, __entry->buf, __entry->id)
+);
+
+TRACE_EVENT(inject_msi,
+	TP_PROTO(int id, unsigned int address, unsigned int data),
+
+	TP_ARGS(id, address, data),
+
+	TP_STRUCT__entry(
+		__field(int, id)
+		__field(unsigned int, address)
+		__field(unsigned int, data)
+	),
+
+	TP_fast_assign(
+		__entry->id = id;
+		__entry->address = address;
+		__entry->data = data;
+	),
+
+	TP_printk("vgpu%d:inject msi address %x data %x\n",
+		  __entry->id, __entry->address, __entry->data)
+);
+
+TRACE_EVENT(render_mmio,
+	TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
+		 unsigned int old_val, unsigned int new_val),
+
+	TP_ARGS(old_id, new_id, action, reg, old_val, new_val),
+
+	TP_STRUCT__entry(
+		__field(int, old_id)
+		__field(int, new_id)
+		__array(char, buf, GVT_TEMP_STR_LEN)
+		__field(unsigned int, reg)
+		__field(unsigned int, old_val)
+		__field(unsigned int, new_val)
+	),
+
+	TP_fast_assign(
+		__entry->old_id = old_id;
+		__entry->new_id = new_id;
+		snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action);
+		__entry->reg = reg;
+		__entry->old_val = old_val;
+		__entry->new_val = new_val;
+	),
+
+	TP_printk("VM%u -> VM%u %s reg %x, old %08x new %08x\n",
+		  __entry->old_id, __entry->new_id,
+		  __entry->buf, __entry->reg,
+		  __entry->old_val, __entry->new_val)
+);
+
+#endif /* _GVT_TRACE_H_ */
+
+/* This part must be out of protection */
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH ../../drivers/gpu/drm/i915/gvt
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/i915/gvt/trace_points.c b/drivers/gpu/drm/i915/gvt/trace_points.c
new file mode 100644
index 0000000000..fe552e877e
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/trace_points.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Jike Song <jike.song@intel.com>
+ *
+ * Contributors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *
+ */
+
+#ifndef __CHECKER__
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
new file mode 100644
index 0000000000..08ad1bd651
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -0,0 +1,510 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eddie Dong <eddie.dong@intel.com>
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "gvt.h"
+#include "i915_pvinfo.h"
+
+void populate_pvinfo_page(struct intel_vgpu *vgpu)
+{
+	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
+	/* setup the ballooning information */
+	vgpu_vreg64_t(vgpu, vgtif_reg(magic)) = VGT_MAGIC;
+	vgpu_vreg_t(vgpu, vgtif_reg(version_major)) = 1;
+	vgpu_vreg_t(vgpu, vgtif_reg(version_minor)) = 0;
+	vgpu_vreg_t(vgpu, vgtif_reg(display_ready)) = 0;
+	vgpu_vreg_t(vgpu, vgtif_reg(vgt_id)) = vgpu->id;
+
+	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_PPGTT;
+	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION;
+	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HUGE_GTT;
+
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) =
+		vgpu_aperture_gmadr_base(vgpu);
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) =
+		vgpu_aperture_sz(vgpu);
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) =
+		vgpu_hidden_gmadr_base(vgpu);
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) =
+		vgpu_hidden_sz(vgpu);
+
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu);
+
+	vgpu_vreg_t(vgpu, vgtif_reg(cursor_x_hot)) = UINT_MAX;
+	vgpu_vreg_t(vgpu, vgtif_reg(cursor_y_hot)) = UINT_MAX;
+
+	gvt_dbg_core("Populate PVINFO PAGE for vGPU %d\n", vgpu->id);
+	gvt_dbg_core("aperture base [GMADR] 0x%llx size 0x%llx\n",
+		vgpu_aperture_gmadr_base(vgpu), vgpu_aperture_sz(vgpu));
+	gvt_dbg_core("hidden base [GMADR] 0x%llx size=0x%llx\n",
+		vgpu_hidden_gmadr_base(vgpu), vgpu_hidden_sz(vgpu));
+	gvt_dbg_core("fence size %d\n", vgpu_fence_sz(vgpu));
+
+	drm_WARN_ON(&i915->drm, sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
+}
+
+/*
+ * vGPU type name is defined as GVTg_Vx_y which contains the physical GPU
+ * generation type (e.g V4 as BDW server, V5 as SKL server).
+ *
+ * Depening on the physical SKU resource, we might see vGPU types like
+ * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create different types of
+ * vGPU on same physical GPU depending on available resource. Each vGPU
+ * type will have a different number of avail_instance to indicate how
+ * many vGPU instance can be created for this type.
+ */
+#define VGPU_MAX_WEIGHT 16
+#define VGPU_WEIGHT(vgpu_num)	\
+	(VGPU_MAX_WEIGHT / (vgpu_num))
+
+static const struct intel_vgpu_config intel_vgpu_configs[] = {
+	{ MB_TO_BYTES(64), MB_TO_BYTES(384), 4, VGPU_WEIGHT(8), GVT_EDID_1024_768, "8" },
+	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, VGPU_WEIGHT(4), GVT_EDID_1920_1200, "4" },
+	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, VGPU_WEIGHT(2), GVT_EDID_1920_1200, "2" },
+	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, VGPU_WEIGHT(1), GVT_EDID_1920_1200, "1" },
+};
+
+/**
+ * intel_gvt_init_vgpu_types - initialize vGPU type list
+ * @gvt : GVT device
+ *
+ * Initialize vGPU type list based on available resource.
+ *
+ */
+int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
+{
+	unsigned int low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
+	unsigned int high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
+	unsigned int num_types = ARRAY_SIZE(intel_vgpu_configs);
+	unsigned int i;
+
+	gvt->types = kcalloc(num_types, sizeof(struct intel_vgpu_type),
+			     GFP_KERNEL);
+	if (!gvt->types)
+		return -ENOMEM;
+
+	gvt->mdev_types = kcalloc(num_types, sizeof(*gvt->mdev_types),
+			     GFP_KERNEL);
+	if (!gvt->mdev_types)
+		goto out_free_types;
+
+	for (i = 0; i < num_types; ++i) {
+		const struct intel_vgpu_config *conf = &intel_vgpu_configs[i];
+
+		if (low_avail / conf->low_mm == 0)
+			break;
+		if (conf->weight < 1 || conf->weight > VGPU_MAX_WEIGHT)
+			goto out_free_mdev_types;
+
+		sprintf(gvt->types[i].name, "GVTg_V%u_%s",
+			GRAPHICS_VER(gvt->gt->i915) == 8 ? 4 : 5, conf->name);
+		gvt->types[i].conf = conf;
+
+		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u weight %u res %s\n",
+			     i, gvt->types[i].name,
+			     min(low_avail / conf->low_mm,
+				 high_avail / conf->high_mm),
+			     conf->low_mm, conf->high_mm, conf->fence,
+			     conf->weight, vgpu_edid_str(conf->edid));
+
+		gvt->mdev_types[i] = &gvt->types[i].type;
+		gvt->mdev_types[i]->sysfs_name = gvt->types[i].name;
+	}
+
+	gvt->num_types = i;
+	return 0;
+
+out_free_mdev_types:
+	kfree(gvt->mdev_types);
+out_free_types:
+	kfree(gvt->types);
+	return -EINVAL;
+}
+
+void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt)
+{
+	kfree(gvt->mdev_types);
+	kfree(gvt->types);
+}
+
+/**
+ * intel_gvt_activate_vgpu - activate a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to activate a virtual GPU.
+ *
+ */
+void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
+{
+	set_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status);
+}
+
+/**
+ * intel_gvt_deactivate_vgpu - deactivate a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to deactivate a virtual GPU.
+ * The virtual GPU will be stopped.
+ *
+ */
+void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu)
+{
+	mutex_lock(&vgpu->vgpu_lock);
+
+	clear_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status);
+
+	if (atomic_read(&vgpu->submission.running_workload_num)) {
+		mutex_unlock(&vgpu->vgpu_lock);
+		intel_gvt_wait_vgpu_idle(vgpu);
+		mutex_lock(&vgpu->vgpu_lock);
+	}
+
+	intel_vgpu_stop_schedule(vgpu);
+
+	mutex_unlock(&vgpu->vgpu_lock);
+}
+
+/**
+ * intel_gvt_release_vgpu - release a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to release a virtual GPU.
+ * The virtual GPU will be stopped and all runtime information will be
+ * destroyed.
+ *
+ */
+void intel_gvt_release_vgpu(struct intel_vgpu *vgpu)
+{
+	intel_gvt_deactivate_vgpu(vgpu);
+
+	mutex_lock(&vgpu->vgpu_lock);
+	vgpu->d3_entered = false;
+	intel_vgpu_clean_workloads(vgpu, ALL_ENGINES);
+	intel_vgpu_dmabuf_cleanup(vgpu);
+	mutex_unlock(&vgpu->vgpu_lock);
+}
+
+/**
+ * intel_gvt_destroy_vgpu - destroy a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to destroy a virtual GPU.
+ *
+ */
+void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_i915_private *i915 = gvt->gt->i915;
+
+	drm_WARN(&i915->drm, test_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status),
+		 "vGPU is still active!\n");
+
+	/*
+	 * remove idr first so later clean can judge if need to stop
+	 * service if no active vgpu.
+	 */
+	mutex_lock(&gvt->lock);
+	idr_remove(&gvt->vgpu_idr, vgpu->id);
+	mutex_unlock(&gvt->lock);
+
+	mutex_lock(&vgpu->vgpu_lock);
+	intel_gvt_debugfs_remove_vgpu(vgpu);
+	intel_vgpu_clean_sched_policy(vgpu);
+	intel_vgpu_clean_submission(vgpu);
+	intel_vgpu_clean_display(vgpu);
+	intel_vgpu_clean_opregion(vgpu);
+	intel_vgpu_reset_ggtt(vgpu, true);
+	intel_vgpu_clean_gtt(vgpu);
+	intel_vgpu_detach_regions(vgpu);
+	intel_vgpu_free_resource(vgpu);
+	intel_vgpu_clean_mmio(vgpu);
+	intel_vgpu_dmabuf_cleanup(vgpu);
+	mutex_unlock(&vgpu->vgpu_lock);
+}
+
+#define IDLE_VGPU_IDR 0
+
+/**
+ * intel_gvt_create_idle_vgpu - create an idle virtual GPU
+ * @gvt: GVT device
+ *
+ * This function is called when user wants to create an idle virtual GPU.
+ *
+ * Returns:
+ * pointer to intel_vgpu, error pointer if failed.
+ */
+struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt)
+{
+	struct intel_vgpu *vgpu;
+	enum intel_engine_id i;
+	int ret;
+
+	vgpu = vzalloc(sizeof(*vgpu));
+	if (!vgpu)
+		return ERR_PTR(-ENOMEM);
+
+	vgpu->id = IDLE_VGPU_IDR;
+	vgpu->gvt = gvt;
+	mutex_init(&vgpu->vgpu_lock);
+
+	for (i = 0; i < I915_NUM_ENGINES; i++)
+		INIT_LIST_HEAD(&vgpu->submission.workload_q_head[i]);
+
+	ret = intel_vgpu_init_sched_policy(vgpu);
+	if (ret)
+		goto out_free_vgpu;
+
+	clear_bit(INTEL_VGPU_STATUS_ACTIVE, vgpu->status);
+	return vgpu;
+
+out_free_vgpu:
+	vfree(vgpu);
+	return ERR_PTR(ret);
+}
+
+/**
+ * intel_gvt_destroy_idle_vgpu - destroy an idle virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to destroy an idle virtual GPU.
+ *
+ */
+void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu)
+{
+	mutex_lock(&vgpu->vgpu_lock);
+	intel_vgpu_clean_sched_policy(vgpu);
+	mutex_unlock(&vgpu->vgpu_lock);
+
+	vfree(vgpu);
+}
+
+int intel_gvt_create_vgpu(struct intel_vgpu *vgpu,
+		const struct intel_vgpu_config *conf)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_i915_private *dev_priv = gvt->gt->i915;
+	int ret;
+
+	gvt_dbg_core("low %u MB high %u MB fence %u\n",
+			BYTES_TO_MB(conf->low_mm), BYTES_TO_MB(conf->high_mm),
+			conf->fence);
+
+	mutex_lock(&gvt->lock);
+	ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU,
+		GFP_KERNEL);
+	if (ret < 0)
+		goto out_unlock;
+
+	vgpu->id = ret;
+	vgpu->sched_ctl.weight = conf->weight;
+	mutex_init(&vgpu->vgpu_lock);
+	mutex_init(&vgpu->dmabuf_lock);
+	INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head);
+	INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL);
+	idr_init_base(&vgpu->object_idr, 1);
+	intel_vgpu_init_cfg_space(vgpu, 1);
+	vgpu->d3_entered = false;
+
+	ret = intel_vgpu_init_mmio(vgpu);
+	if (ret)
+		goto out_clean_idr;
+
+	ret = intel_vgpu_alloc_resource(vgpu, conf);
+	if (ret)
+		goto out_clean_vgpu_mmio;
+
+	populate_pvinfo_page(vgpu);
+
+	ret = intel_vgpu_init_gtt(vgpu);
+	if (ret)
+		goto out_clean_vgpu_resource;
+
+	ret = intel_vgpu_init_opregion(vgpu);
+	if (ret)
+		goto out_clean_gtt;
+
+	ret = intel_vgpu_init_display(vgpu, conf->edid);
+	if (ret)
+		goto out_clean_opregion;
+
+	ret = intel_vgpu_setup_submission(vgpu);
+	if (ret)
+		goto out_clean_display;
+
+	ret = intel_vgpu_init_sched_policy(vgpu);
+	if (ret)
+		goto out_clean_submission;
+
+	intel_gvt_debugfs_add_vgpu(vgpu);
+
+	ret = intel_gvt_set_opregion(vgpu);
+	if (ret)
+		goto out_clean_sched_policy;
+
+	if (IS_BROADWELL(dev_priv) || IS_BROXTON(dev_priv))
+		ret = intel_gvt_set_edid(vgpu, PORT_B);
+	else
+		ret = intel_gvt_set_edid(vgpu, PORT_D);
+	if (ret)
+		goto out_clean_sched_policy;
+
+	intel_gvt_update_reg_whitelist(vgpu);
+	mutex_unlock(&gvt->lock);
+	return 0;
+
+out_clean_sched_policy:
+	intel_vgpu_clean_sched_policy(vgpu);
+out_clean_submission:
+	intel_vgpu_clean_submission(vgpu);
+out_clean_display:
+	intel_vgpu_clean_display(vgpu);
+out_clean_opregion:
+	intel_vgpu_clean_opregion(vgpu);
+out_clean_gtt:
+	intel_vgpu_clean_gtt(vgpu);
+out_clean_vgpu_resource:
+	intel_vgpu_free_resource(vgpu);
+out_clean_vgpu_mmio:
+	intel_vgpu_clean_mmio(vgpu);
+out_clean_idr:
+	idr_remove(&gvt->vgpu_idr, vgpu->id);
+out_unlock:
+	mutex_unlock(&gvt->lock);
+	return ret;
+}
+
+/**
+ * intel_gvt_reset_vgpu_locked - reset a virtual GPU by DMLR or GT reset
+ * @vgpu: virtual GPU
+ * @dmlr: vGPU Device Model Level Reset or GT Reset
+ * @engine_mask: engines to reset for GT reset
+ *
+ * This function is called when user wants to reset a virtual GPU through
+ * device model reset or GT reset. The caller should hold the vgpu lock.
+ *
+ * vGPU Device Model Level Reset (DMLR) simulates the PCI level reset to reset
+ * the whole vGPU to default state as when it is created. This vGPU function
+ * is required both for functionary and security concerns.The ultimate goal
+ * of vGPU FLR is that reuse a vGPU instance by virtual machines. When we
+ * assign a vGPU to a virtual machine we must isse such reset first.
+ *
+ * Full GT Reset and Per-Engine GT Reset are soft reset flow for GPU engines
+ * (Render, Blitter, Video, Video Enhancement). It is defined by GPU Spec.
+ * Unlike the FLR, GT reset only reset particular resource of a vGPU per
+ * the reset request. Guest driver can issue a GT reset by programming the
+ * virtual GDRST register to reset specific virtual GPU engine or all
+ * engines.
+ *
+ * The parameter dev_level is to identify if we will do DMLR or GT reset.
+ * The parameter engine_mask is to specific the engines that need to be
+ * resetted. If value ALL_ENGINES is given for engine_mask, it means
+ * the caller requests a full GT reset that we will reset all virtual
+ * GPU engines. For FLR, engine_mask is ignored.
+ */
+void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
+				 intel_engine_mask_t engine_mask)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	intel_engine_mask_t resetting_eng = dmlr ? ALL_ENGINES : engine_mask;
+
+	gvt_dbg_core("------------------------------------------\n");
+	gvt_dbg_core("resseting vgpu%d, dmlr %d, engine_mask %08x\n",
+		     vgpu->id, dmlr, engine_mask);
+
+	vgpu->resetting_eng = resetting_eng;
+
+	intel_vgpu_stop_schedule(vgpu);
+	/*
+	 * The current_vgpu will set to NULL after stopping the
+	 * scheduler when the reset is triggered by current vgpu.
+	 */
+	if (scheduler->current_vgpu == NULL) {
+		mutex_unlock(&vgpu->vgpu_lock);
+		intel_gvt_wait_vgpu_idle(vgpu);
+		mutex_lock(&vgpu->vgpu_lock);
+	}
+
+	intel_vgpu_reset_submission(vgpu, resetting_eng);
+	/* full GPU reset or device model level reset */
+	if (engine_mask == ALL_ENGINES || dmlr) {
+		intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
+		if (engine_mask == ALL_ENGINES)
+			intel_vgpu_invalidate_ppgtt(vgpu);
+		/*fence will not be reset during virtual reset */
+		if (dmlr) {
+			if(!vgpu->d3_entered) {
+				intel_vgpu_invalidate_ppgtt(vgpu);
+				intel_vgpu_destroy_all_ppgtt_mm(vgpu);
+			}
+			intel_vgpu_reset_ggtt(vgpu, true);
+			intel_vgpu_reset_resource(vgpu);
+		}
+
+		intel_vgpu_reset_mmio(vgpu, dmlr);
+		populate_pvinfo_page(vgpu);
+
+		if (dmlr) {
+			intel_vgpu_reset_display(vgpu);
+			intel_vgpu_reset_cfg_space(vgpu);
+			/* only reset the failsafe mode when dmlr reset */
+			vgpu->failsafe = false;
+			/*
+			 * PCI_D0 is set before dmlr, so reset d3_entered here
+			 * after done using.
+			 */
+			if(vgpu->d3_entered)
+				vgpu->d3_entered = false;
+			else
+				vgpu->pv_notified = false;
+		}
+	}
+
+	vgpu->resetting_eng = 0;
+	gvt_dbg_core("reset vgpu%d done\n", vgpu->id);
+	gvt_dbg_core("------------------------------------------\n");
+}
+
+/**
+ * intel_gvt_reset_vgpu - reset a virtual GPU (Function Level)
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to reset a virtual GPU.
+ *
+ */
+void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu)
+{
+	mutex_lock(&vgpu->vgpu_lock);
+	intel_gvt_reset_vgpu_locked(vgpu, true, 0);
+	mutex_unlock(&vgpu->vgpu_lock);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/gpu/drm/i915/gvt
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip