Adding upstream version 6.6.15.upstream/6.6.15

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-11 08:27:49 +0000
commit: ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree: b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/gpu/drm/i915/gem
parent: Initial commit. (diff)
download: linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz
linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip
66 files changed, 30210 insertions, 0 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_busy.c b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
new file mode 100644
index 0000000000..ddda468241
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_busy.c
@@ -0,0 +1,164 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/dma-fence-array.h>
+
+#include "gt/intel_engine.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+static __always_inline u32 __busy_read_flag(u16 id)
+{
+	if (id == (u16)I915_ENGINE_CLASS_INVALID)
+		return 0xffff0000u;
+
+	GEM_BUG_ON(id >= 16);
+	return 0x10000u << id;
+}
+
+static __always_inline u32 __busy_write_id(u16 id)
+{
+	/*
+	 * The uABI guarantees an active writer is also amongst the read
+	 * engines. This would be true if we accessed the activity tracking
+	 * under the lock, but as we perform the lookup of the object and
+	 * its activity locklessly we can not guarantee that the last_write
+	 * being active implies that we have set the same engine flag from
+	 * last_read - hence we always set both read and write busy for
+	 * last_write.
+	 */
+	if (id == (u16)I915_ENGINE_CLASS_INVALID)
+		return 0xffffffffu;
+
+	return (id + 1) | __busy_read_flag(id);
+}
+
+static __always_inline unsigned int
+__busy_set_if_active(struct dma_fence *fence, u32 (*flag)(u16 id))
+{
+	const struct i915_request *rq;
+
+	/*
+	 * We have to check the current hw status of the fence as the uABI
+	 * guarantees forward progress. We could rely on the idle worker
+	 * to eventually flush us, but to minimise latency just ask the
+	 * hardware.
+	 *
+	 * Note we only report on the status of native fences and we currently
+	 * have two native fences:
+	 *
+	 * 1. A composite fence (dma_fence_array) constructed of i915 requests
+	 * created during a parallel submission. In this case we deconstruct the
+	 * composite fence into individual i915 requests and check the status of
+	 * each request.
+	 *
+	 * 2. A single i915 request.
+	 */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+		struct dma_fence **child = array->fences;
+		unsigned int nchild = array->num_fences;
+
+		do {
+			struct dma_fence *current_fence = *child++;
+
+			/* Not an i915 fence, can't be busy per above */
+			if (!dma_fence_is_i915(current_fence) ||
+			    !test_bit(I915_FENCE_FLAG_COMPOSITE,
+				      &current_fence->flags)) {
+				return 0;
+			}
+
+			rq = to_request(current_fence);
+			if (!i915_request_completed(rq))
+				return flag(rq->engine->uabi_class);
+		} while (--nchild);
+
+		/* All requests in array complete, not busy */
+		return 0;
+	} else {
+		if (!dma_fence_is_i915(fence))
+			return 0;
+
+		rq = to_request(fence);
+		if (i915_request_completed(rq))
+			return 0;
+
+		/* Beware type-expansion follies! */
+		BUILD_BUG_ON(!typecheck(u16, rq->engine->uabi_class));
+		return flag(rq->engine->uabi_class);
+	}
+}
+
+static __always_inline unsigned int
+busy_check_reader(struct dma_fence *fence)
+{
+	return __busy_set_if_active(fence, __busy_read_flag);
+}
+
+static __always_inline unsigned int
+busy_check_writer(struct dma_fence *fence)
+{
+	if (!fence)
+		return 0;
+
+	return __busy_set_if_active(fence, __busy_write_id);
+}
+
+int
+i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_gem_busy *args = data;
+	struct drm_i915_gem_object *obj;
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	int err;
+
+	err = -ENOENT;
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj)
+		goto out;
+
+	/*
+	 * A discrepancy here is that we do not report the status of
+	 * non-i915 fences, i.e. even though we may report the object as idle,
+	 * a call to set-domain may still stall waiting for foreign rendering.
+	 * This also means that wait-ioctl may report an object as busy,
+	 * where busy-ioctl considers it idle.
+	 *
+	 * We trade the ability to warn of foreign fences to report on which
+	 * i915 engines are active for the object.
+	 *
+	 * Alternatively, we can trade that extra information on read/write
+	 * activity with
+	 *	args->busy =
+	 *		!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ);
+	 * to report the overall busyness. This is what the wait-ioctl does.
+	 *
+	 */
+	args->busy = 0;
+	dma_resv_iter_begin(&cursor, obj->base.resv, DMA_RESV_USAGE_READ);
+	dma_resv_for_each_fence_unlocked(&cursor, fence) {
+		if (dma_resv_iter_is_restarted(&cursor))
+			args->busy = 0;
+
+		if (dma_resv_iter_usage(&cursor) <= DMA_RESV_USAGE_WRITE)
+			/* Translate the write fences to the READ *and* WRITE engine */
+			args->busy |= busy_check_writer(fence);
+		else
+			/* Translate read fences to READ set of engines */
+			args->busy |= busy_check_reader(fence);
+	}
+	dma_resv_iter_end(&cursor);
+
+	err = 0;
+out:
+	rcu_read_unlock();
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
new file mode 100644
index 0000000000..385ffc575b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -0,0 +1,139 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <drm/drm_cache.h>
+
+#include "display/intel_frontbuffer.h"
+
+#include "i915_config.h"
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_sw_fence_work.h"
+#include "i915_trace.h"
+
+struct clflush {
+	struct dma_fence_work base;
+	struct drm_i915_gem_object *obj;
+};
+
+static void __do_clflush(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	drm_clflush_sg(obj->mm.pages);
+
+	i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
+}
+
+static void clflush_work(struct dma_fence_work *base)
+{
+	struct clflush *clflush = container_of(base, typeof(*clflush), base);
+
+	__do_clflush(clflush->obj);
+}
+
+static void clflush_release(struct dma_fence_work *base)
+{
+	struct clflush *clflush = container_of(base, typeof(*clflush), base);
+
+	i915_gem_object_unpin_pages(clflush->obj);
+	i915_gem_object_put(clflush->obj);
+}
+
+static const struct dma_fence_work_ops clflush_ops = {
+	.name = "clflush",
+	.work = clflush_work,
+	.release = clflush_release,
+};
+
+static struct clflush *clflush_work_create(struct drm_i915_gem_object *obj)
+{
+	struct clflush *clflush;
+
+	GEM_BUG_ON(!obj->cache_dirty);
+
+	clflush = kmalloc(sizeof(*clflush), GFP_KERNEL);
+	if (!clflush)
+		return NULL;
+
+	if (__i915_gem_object_get_pages(obj) < 0) {
+		kfree(clflush);
+		return NULL;
+	}
+
+	dma_fence_work_init(&clflush->base, &clflush_ops);
+	clflush->obj = i915_gem_object_get(obj); /* obj <-> clflush cycle */
+
+	return clflush;
+}
+
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct clflush *clflush;
+
+	assert_object_held(obj);
+
+	if (IS_DGFX(i915)) {
+		WARN_ON_ONCE(obj->cache_dirty);
+		return false;
+	}
+
+	/*
+	 * Stolen memory is always coherent with the GPU as it is explicitly
+	 * marked as wc by the system, or the system is cache-coherent.
+	 * Similarly, we only access struct pages through the CPU cache, so
+	 * anything not backed by physical memory we consider to be always
+	 * coherent and not need clflushing.
+	 */
+	if (!i915_gem_object_has_struct_page(obj)) {
+		obj->cache_dirty = false;
+		return false;
+	}
+
+	/* If the GPU is snooping the contents of the CPU cache,
+	 * we do not need to manually clear the CPU cache lines.  However,
+	 * the caches are only snooped when the render cache is
+	 * flushed/invalidated.  As we always have to emit invalidations
+	 * and flushes when moving into and out of the RENDER domain, correct
+	 * snooping behaviour occurs naturally as the result of our domain
+	 * tracking.
+	 */
+	if (!(flags & I915_CLFLUSH_FORCE) &&
+	    obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
+		return false;
+
+	trace_i915_gem_object_clflush(obj);
+
+	clflush = NULL;
+	if (!(flags & I915_CLFLUSH_SYNC) &&
+	    dma_resv_reserve_fences(obj->base.resv, 1) == 0)
+		clflush = clflush_work_create(obj);
+	if (clflush) {
+		i915_sw_fence_await_reservation(&clflush->base.chain,
+						obj->base.resv, true,
+						i915_fence_timeout(i915),
+						I915_FENCE_GFP);
+		dma_resv_add_fence(obj->base.resv, &clflush->base.dma,
+				   DMA_RESV_USAGE_KERNEL);
+		dma_fence_work_commit(&clflush->base);
+		/*
+		 * We must have successfully populated the pages(since we are
+		 * holding a pin on the pages as per the flush worker) to reach
+		 * this point, which must mean we have already done the required
+		 * flush-on-acquire, hence resetting cache_dirty here should be
+		 * safe.
+		 */
+		obj->cache_dirty = false;
+	} else if (obj->mm.pages) {
+		__do_clflush(obj);
+		obj->cache_dirty = false;
+	} else {
+		GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
+	}
+
+	return true;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.h b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
new file mode 100644
index 0000000000..e6c3829731
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.h
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CLFLUSH_H__
+#define __I915_GEM_CLFLUSH_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+
+bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
+			     unsigned int flags);
+#define I915_CLFLUSH_FORCE BIT(0)
+#define I915_CLFLUSH_SYNC BIT(1)
+
+#endif /* __I915_GEM_CLFLUSH_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
new file mode 100644
index 0000000000..e38f06a6e5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -0,0 +1,2599 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2011-2012 Intel Corporation
+ */
+
+/*
+ * This file implements HW context support. On gen5+ a HW context consists of an
+ * opaque GPU object which is referenced at times of context saves and restores.
+ * With RC6 enabled, the context is also referenced as the GPU enters and exists
+ * from RC6 (GPU has it's own internal power context, except on gen5). Though
+ * something like a context does exist for the media ring, the code only
+ * supports contexts for the render ring.
+ *
+ * In software, there is a distinction between contexts created by the user,
+ * and the default HW context. The default HW context is used by GPU clients
+ * that do not request setup of their own hardware context. The default
+ * context's state is never restored to help prevent programming errors. This
+ * would happen if a client ran and piggy-backed off another clients GPU state.
+ * The default context only exists to give the GPU some offset to load as the
+ * current to invoke a save of the context we actually care about. In fact, the
+ * code could likely be constructed, albeit in a more complicated fashion, to
+ * never use the default context, though that limits the driver's ability to
+ * swap out, and/or destroy other contexts.
+ *
+ * All other contexts are created as a request by the GPU client. These contexts
+ * store GPU state, and thus allow GPU clients to not re-emit state (and
+ * potentially query certain state) at any time. The kernel driver makes
+ * certain that the appropriate commands are inserted.
+ *
+ * The context life cycle is semi-complicated in that context BOs may live
+ * longer than the context itself because of the way the hardware, and object
+ * tracking works. Below is a very crude representation of the state machine
+ * describing the context life.
+ *                                         refcount     pincount     active
+ * S0: initial state                          0            0           0
+ * S1: context created                        1            0           0
+ * S2: context is currently running           2            1           X
+ * S3: GPU referenced, but not current        2            0           1
+ * S4: context is current, but destroyed      1            1           0
+ * S5: like S3, but destroyed                 1            0           1
+ *
+ * The most common (but not all) transitions:
+ * S0->S1: client creates a context
+ * S1->S2: client submits execbuf with context
+ * S2->S3: other clients submits execbuf with context
+ * S3->S1: context object was retired
+ * S3->S2: clients submits another execbuf
+ * S2->S4: context destroy called with current context
+ * S3->S5->S0: destroy path
+ * S4->S5->S0: destroy path on current context
+ *
+ * There are two confusing terms used above:
+ *  The "current context" means the context which is currently running on the
+ *  GPU. The GPU has loaded its state already and has stored away the gtt
+ *  offset of the BO. The GPU is not actively referencing the data at this
+ *  offset, but it will on the next context switch. The only way to avoid this
+ *  is to do a GPU reset.
+ *
+ *  An "active context' is one which was previously the "current context" and is
+ *  on the active list waiting for the next context switch to occur. Until this
+ *  happens, the object must remain at the same gtt offset. It is therefore
+ *  possible to destroy a context, but it is still active.
+ *
+ */
+
+#include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/nospec.h>
+
+#include <drm/drm_cache.h>
+#include <drm/drm_syncobj.h>
+
+#include "gt/gen6_ppgtt.h"
+#include "gt/intel_context.h"
+#include "gt/intel_context_param.h"
+#include "gt/intel_engine_heartbeat.h"
+#include "gt/intel_engine_user.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_ring.h"
+
+#include "pxp/intel_pxp.h"
+
+#include "i915_file_private.h"
+#include "i915_gem_context.h"
+#include "i915_trace.h"
+#include "i915_user_extensions.h"
+
+#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
+
+static struct kmem_cache *slab_luts;
+
+struct i915_lut_handle *i915_lut_handle_alloc(void)
+{
+	return kmem_cache_alloc(slab_luts, GFP_KERNEL);
+}
+
+void i915_lut_handle_free(struct i915_lut_handle *lut)
+{
+	return kmem_cache_free(slab_luts, lut);
+}
+
+static void lut_close(struct i915_gem_context *ctx)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	mutex_lock(&ctx->lut_mutex);
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
+		struct i915_vma *vma = rcu_dereference_raw(*slot);
+		struct drm_i915_gem_object *obj = vma->obj;
+		struct i915_lut_handle *lut;
+
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		spin_lock(&obj->lut_lock);
+		list_for_each_entry(lut, &obj->lut_list, obj_link) {
+			if (lut->ctx != ctx)
+				continue;
+
+			if (lut->handle != iter.index)
+				continue;
+
+			list_del(&lut->obj_link);
+			break;
+		}
+		spin_unlock(&obj->lut_lock);
+
+		if (&lut->obj_link != &obj->lut_list) {
+			i915_lut_handle_free(lut);
+			radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
+			i915_vma_close(vma);
+			i915_gem_object_put(obj);
+		}
+
+		i915_gem_object_put(obj);
+	}
+	rcu_read_unlock();
+	mutex_unlock(&ctx->lut_mutex);
+}
+
+static struct intel_context *
+lookup_user_engine(struct i915_gem_context *ctx,
+		   unsigned long flags,
+		   const struct i915_engine_class_instance *ci)
+#define LOOKUP_USER_INDEX BIT(0)
+{
+	int idx;
+
+	if (!!(flags & LOOKUP_USER_INDEX) != i915_gem_context_user_engines(ctx))
+		return ERR_PTR(-EINVAL);
+
+	if (!i915_gem_context_user_engines(ctx)) {
+		struct intel_engine_cs *engine;
+
+		engine = intel_engine_lookup_user(ctx->i915,
+						  ci->engine_class,
+						  ci->engine_instance);
+		if (!engine)
+			return ERR_PTR(-EINVAL);
+
+		idx = engine->legacy_idx;
+	} else {
+		idx = ci->engine_instance;
+	}
+
+	return i915_gem_context_get_engine(ctx, idx);
+}
+
+static int validate_priority(struct drm_i915_private *i915,
+			     const struct drm_i915_gem_context_param *args)
+{
+	s64 priority = args->value;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY))
+		return -ENODEV;
+
+	if (priority > I915_CONTEXT_MAX_USER_PRIORITY ||
+	    priority < I915_CONTEXT_MIN_USER_PRIORITY)
+		return -EINVAL;
+
+	if (priority > I915_CONTEXT_DEFAULT_PRIORITY &&
+	    !capable(CAP_SYS_NICE))
+		return -EPERM;
+
+	return 0;
+}
+
+static void proto_context_close(struct drm_i915_private *i915,
+				struct i915_gem_proto_context *pc)
+{
+	int i;
+
+	if (pc->pxp_wakeref)
+		intel_runtime_pm_put(&i915->runtime_pm, pc->pxp_wakeref);
+	if (pc->vm)
+		i915_vm_put(pc->vm);
+	if (pc->user_engines) {
+		for (i = 0; i < pc->num_user_engines; i++)
+			kfree(pc->user_engines[i].siblings);
+		kfree(pc->user_engines);
+	}
+	kfree(pc);
+}
+
+static int proto_context_set_persistence(struct drm_i915_private *i915,
+					 struct i915_gem_proto_context *pc,
+					 bool persist)
+{
+	if (persist) {
+		/*
+		 * Only contexts that are short-lived [that will expire or be
+		 * reset] are allowed to survive past termination. We require
+		 * hangcheck to ensure that the persistent requests are healthy.
+		 */
+		if (!i915->params.enable_hangcheck)
+			return -EINVAL;
+
+		pc->user_flags |= BIT(UCONTEXT_PERSISTENCE);
+	} else {
+		/* To cancel a context we use "preempt-to-idle" */
+		if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+			return -ENODEV;
+
+		/*
+		 * If the cancel fails, we then need to reset, cleanly!
+		 *
+		 * If the per-engine reset fails, all hope is lost! We resort
+		 * to a full GPU reset in that unlikely case, but realistically
+		 * if the engine could not reset, the full reset does not fare
+		 * much better. The damage has been done.
+		 *
+		 * However, if we cannot reset an engine by itself, we cannot
+		 * cleanup a hanging persistent context without causing
+		 * colateral damage, and we should not pretend we can by
+		 * exposing the interface.
+		 */
+		if (!intel_has_reset_engine(to_gt(i915)))
+			return -ENODEV;
+
+		pc->user_flags &= ~BIT(UCONTEXT_PERSISTENCE);
+	}
+
+	return 0;
+}
+
+static int proto_context_set_protected(struct drm_i915_private *i915,
+				       struct i915_gem_proto_context *pc,
+				       bool protected)
+{
+	int ret = 0;
+
+	if (!protected) {
+		pc->uses_protected_content = false;
+	} else if (!intel_pxp_is_enabled(i915->pxp)) {
+		ret = -ENODEV;
+	} else if ((pc->user_flags & BIT(UCONTEXT_RECOVERABLE)) ||
+		   !(pc->user_flags & BIT(UCONTEXT_BANNABLE))) {
+		ret = -EPERM;
+	} else {
+		pc->uses_protected_content = true;
+
+		/*
+		 * protected context usage requires the PXP session to be up,
+		 * which in turn requires the device to be active.
+		 */
+		pc->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+		if (!intel_pxp_is_active(i915->pxp))
+			ret = intel_pxp_start(i915->pxp);
+	}
+
+	return ret;
+}
+
+static struct i915_gem_proto_context *
+proto_context_create(struct drm_i915_private *i915, unsigned int flags)
+{
+	struct i915_gem_proto_context *pc, *err;
+
+	pc = kzalloc(sizeof(*pc), GFP_KERNEL);
+	if (!pc)
+		return ERR_PTR(-ENOMEM);
+
+	pc->num_user_engines = -1;
+	pc->user_engines = NULL;
+	pc->user_flags = BIT(UCONTEXT_BANNABLE) |
+			 BIT(UCONTEXT_RECOVERABLE);
+	if (i915->params.enable_hangcheck)
+		pc->user_flags |= BIT(UCONTEXT_PERSISTENCE);
+	pc->sched.priority = I915_PRIORITY_NORMAL;
+
+	if (flags & I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE) {
+		if (!HAS_EXECLISTS(i915)) {
+			err = ERR_PTR(-EINVAL);
+			goto proto_close;
+		}
+		pc->single_timeline = true;
+	}
+
+	return pc;
+
+proto_close:
+	proto_context_close(i915, pc);
+	return err;
+}
+
+static int proto_context_register_locked(struct drm_i915_file_private *fpriv,
+					 struct i915_gem_proto_context *pc,
+					 u32 *id)
+{
+	int ret;
+	void *old;
+
+	lockdep_assert_held(&fpriv->proto_context_lock);
+
+	ret = xa_alloc(&fpriv->context_xa, id, NULL, xa_limit_32b, GFP_KERNEL);
+	if (ret)
+		return ret;
+
+	old = xa_store(&fpriv->proto_context_xa, *id, pc, GFP_KERNEL);
+	if (xa_is_err(old)) {
+		xa_erase(&fpriv->context_xa, *id);
+		return xa_err(old);
+	}
+	WARN_ON(old);
+
+	return 0;
+}
+
+static int proto_context_register(struct drm_i915_file_private *fpriv,
+				  struct i915_gem_proto_context *pc,
+				  u32 *id)
+{
+	int ret;
+
+	mutex_lock(&fpriv->proto_context_lock);
+	ret = proto_context_register_locked(fpriv, pc, id);
+	mutex_unlock(&fpriv->proto_context_lock);
+
+	return ret;
+}
+
+static struct i915_address_space *
+i915_gem_vm_lookup(struct drm_i915_file_private *file_priv, u32 id)
+{
+	struct i915_address_space *vm;
+
+	xa_lock(&file_priv->vm_xa);
+	vm = xa_load(&file_priv->vm_xa, id);
+	if (vm)
+		kref_get(&vm->ref);
+	xa_unlock(&file_priv->vm_xa);
+
+	return vm;
+}
+
+static int set_proto_ctx_vm(struct drm_i915_file_private *fpriv,
+			    struct i915_gem_proto_context *pc,
+			    const struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = fpriv->i915;
+	struct i915_address_space *vm;
+
+	if (args->size)
+		return -EINVAL;
+
+	if (!HAS_FULL_PPGTT(i915))
+		return -ENODEV;
+
+	if (upper_32_bits(args->value))
+		return -ENOENT;
+
+	vm = i915_gem_vm_lookup(fpriv, args->value);
+	if (!vm)
+		return -ENOENT;
+
+	if (pc->vm)
+		i915_vm_put(pc->vm);
+	pc->vm = vm;
+
+	return 0;
+}
+
+struct set_proto_ctx_engines {
+	struct drm_i915_private *i915;
+	unsigned num_engines;
+	struct i915_gem_proto_engine *engines;
+};
+
+static int
+set_proto_ctx_engines_balance(struct i915_user_extension __user *base,
+			      void *data)
+{
+	struct i915_context_engines_load_balance __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_proto_ctx_engines *set = data;
+	struct drm_i915_private *i915 = set->i915;
+	struct intel_engine_cs **siblings;
+	u16 num_siblings, idx;
+	unsigned int n;
+	int err;
+
+	if (!HAS_EXECLISTS(i915))
+		return -ENODEV;
+
+	if (get_user(idx, &ext->engine_index))
+		return -EFAULT;
+
+	if (idx >= set->num_engines) {
+		drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
+			idx, set->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->num_engines);
+	if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_INVALID) {
+		drm_dbg(&i915->drm,
+			"Invalid placement[%d], already occupied\n", idx);
+		return -EEXIST;
+	}
+
+	if (get_user(num_siblings, &ext->num_siblings))
+		return -EFAULT;
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	err = check_user_mbz(&ext->mbz64);
+	if (err)
+		return err;
+
+	if (num_siblings == 0)
+		return 0;
+
+	siblings = kmalloc_array(num_siblings, sizeof(*siblings), GFP_KERNEL);
+	if (!siblings)
+		return -ENOMEM;
+
+	for (n = 0; n < num_siblings; n++) {
+		struct i915_engine_class_instance ci;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+			err = -EFAULT;
+			goto err_siblings;
+		}
+
+		siblings[n] = intel_engine_lookup_user(i915,
+						       ci.engine_class,
+						       ci.engine_instance);
+		if (!siblings[n]) {
+			drm_dbg(&i915->drm,
+				"Invalid sibling[%d]: { class:%d, inst:%d }\n",
+				n, ci.engine_class, ci.engine_instance);
+			err = -EINVAL;
+			goto err_siblings;
+		}
+	}
+
+	if (num_siblings == 1) {
+		set->engines[idx].type = I915_GEM_ENGINE_TYPE_PHYSICAL;
+		set->engines[idx].engine = siblings[0];
+		kfree(siblings);
+	} else {
+		set->engines[idx].type = I915_GEM_ENGINE_TYPE_BALANCED;
+		set->engines[idx].num_siblings = num_siblings;
+		set->engines[idx].siblings = siblings;
+	}
+
+	return 0;
+
+err_siblings:
+	kfree(siblings);
+
+	return err;
+}
+
+static int
+set_proto_ctx_engines_bond(struct i915_user_extension __user *base, void *data)
+{
+	struct i915_context_engines_bond __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_proto_ctx_engines *set = data;
+	struct drm_i915_private *i915 = set->i915;
+	struct i915_engine_class_instance ci;
+	struct intel_engine_cs *master;
+	u16 idx, num_bonds;
+	int err, n;
+
+	if (GRAPHICS_VER(i915) >= 12 && !IS_TIGERLAKE(i915) &&
+	    !IS_ROCKETLAKE(i915) && !IS_ALDERLAKE_S(i915)) {
+		drm_dbg(&i915->drm,
+			"Bonding not supported on this platform\n");
+		return -ENODEV;
+	}
+
+	if (get_user(idx, &ext->virtual_index))
+		return -EFAULT;
+
+	if (idx >= set->num_engines) {
+		drm_dbg(&i915->drm,
+			"Invalid index for virtual engine: %d >= %d\n",
+			idx, set->num_engines);
+		return -EINVAL;
+	}
+
+	idx = array_index_nospec(idx, set->num_engines);
+	if (set->engines[idx].type == I915_GEM_ENGINE_TYPE_INVALID) {
+		drm_dbg(&i915->drm, "Invalid engine at %d\n", idx);
+		return -EINVAL;
+	}
+
+	if (set->engines[idx].type != I915_GEM_ENGINE_TYPE_PHYSICAL) {
+		drm_dbg(&i915->drm,
+			"Bonding with virtual engines not allowed\n");
+		return -EINVAL;
+	}
+
+	err = check_user_mbz(&ext->flags);
+	if (err)
+		return err;
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	if (copy_from_user(&ci, &ext->master, sizeof(ci)))
+		return -EFAULT;
+
+	master = intel_engine_lookup_user(i915,
+					  ci.engine_class,
+					  ci.engine_instance);
+	if (!master) {
+		drm_dbg(&i915->drm,
+			"Unrecognised master engine: { class:%u, instance:%u }\n",
+			ci.engine_class, ci.engine_instance);
+		return -EINVAL;
+	}
+
+	if (intel_engine_uses_guc(master)) {
+		drm_dbg(&i915->drm, "bonding extension not supported with GuC submission");
+		return -ENODEV;
+	}
+
+	if (get_user(num_bonds, &ext->num_bonds))
+		return -EFAULT;
+
+	for (n = 0; n < num_bonds; n++) {
+		struct intel_engine_cs *bond;
+
+		if (copy_from_user(&ci, &ext->engines[n], sizeof(ci)))
+			return -EFAULT;
+
+		bond = intel_engine_lookup_user(i915,
+						ci.engine_class,
+						ci.engine_instance);
+		if (!bond) {
+			drm_dbg(&i915->drm,
+				"Unrecognised engine[%d] for bonding: { class:%d, instance: %d }\n",
+				n, ci.engine_class, ci.engine_instance);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int
+set_proto_ctx_engines_parallel_submit(struct i915_user_extension __user *base,
+				      void *data)
+{
+	struct i915_context_engines_parallel_submit __user *ext =
+		container_of_user(base, typeof(*ext), base);
+	const struct set_proto_ctx_engines *set = data;
+	struct drm_i915_private *i915 = set->i915;
+	struct i915_engine_class_instance prev_engine;
+	u64 flags;
+	int err = 0, n, i, j;
+	u16 slot, width, num_siblings;
+	struct intel_engine_cs **siblings = NULL;
+	intel_engine_mask_t prev_mask;
+
+	if (get_user(slot, &ext->engine_index))
+		return -EFAULT;
+
+	if (get_user(width, &ext->width))
+		return -EFAULT;
+
+	if (get_user(num_siblings, &ext->num_siblings))
+		return -EFAULT;
+
+	if (!intel_uc_uses_guc_submission(&to_gt(i915)->uc) &&
+	    num_siblings != 1) {
+		drm_dbg(&i915->drm, "Only 1 sibling (%d) supported in non-GuC mode\n",
+			num_siblings);
+		return -EINVAL;
+	}
+
+	if (slot >= set->num_engines) {
+		drm_dbg(&i915->drm, "Invalid placement value, %d >= %d\n",
+			slot, set->num_engines);
+		return -EINVAL;
+	}
+
+	if (set->engines[slot].type != I915_GEM_ENGINE_TYPE_INVALID) {
+		drm_dbg(&i915->drm,
+			"Invalid placement[%d], already occupied\n", slot);
+		return -EINVAL;
+	}
+
+	if (get_user(flags, &ext->flags))
+		return -EFAULT;
+
+	if (flags) {
+		drm_dbg(&i915->drm, "Unknown flags 0x%02llx", flags);
+		return -EINVAL;
+	}
+
+	for (n = 0; n < ARRAY_SIZE(ext->mbz64); n++) {
+		err = check_user_mbz(&ext->mbz64[n]);
+		if (err)
+			return err;
+	}
+
+	if (width < 2) {
+		drm_dbg(&i915->drm, "Width (%d) < 2\n", width);
+		return -EINVAL;
+	}
+
+	if (num_siblings < 1) {
+		drm_dbg(&i915->drm, "Number siblings (%d) < 1\n",
+			num_siblings);
+		return -EINVAL;
+	}
+
+	siblings = kmalloc_array(num_siblings * width,
+				 sizeof(*siblings),
+				 GFP_KERNEL);
+	if (!siblings)
+		return -ENOMEM;
+
+	/* Create contexts / engines */
+	for (i = 0; i < width; ++i) {
+		intel_engine_mask_t current_mask = 0;
+
+		for (j = 0; j < num_siblings; ++j) {
+			struct i915_engine_class_instance ci;
+
+			n = i * num_siblings + j;
+			if (copy_from_user(&ci, &ext->engines[n], sizeof(ci))) {
+				err = -EFAULT;
+				goto out_err;
+			}
+
+			siblings[n] =
+				intel_engine_lookup_user(i915, ci.engine_class,
+							 ci.engine_instance);
+			if (!siblings[n]) {
+				drm_dbg(&i915->drm,
+					"Invalid sibling[%d]: { class:%d, inst:%d }\n",
+					n, ci.engine_class, ci.engine_instance);
+				err = -EINVAL;
+				goto out_err;
+			}
+
+			/*
+			 * We don't support breadcrumb handshake on these
+			 * classes
+			 */
+			if (siblings[n]->class == RENDER_CLASS ||
+			    siblings[n]->class == COMPUTE_CLASS) {
+				err = -EINVAL;
+				goto out_err;
+			}
+
+			if (n) {
+				if (prev_engine.engine_class !=
+				    ci.engine_class) {
+					drm_dbg(&i915->drm,
+						"Mismatched class %d, %d\n",
+						prev_engine.engine_class,
+						ci.engine_class);
+					err = -EINVAL;
+					goto out_err;
+				}
+			}
+
+			prev_engine = ci;
+			current_mask |= siblings[n]->logical_mask;
+		}
+
+		if (i > 0) {
+			if (current_mask != prev_mask << 1) {
+				drm_dbg(&i915->drm,
+					"Non contiguous logical mask 0x%x, 0x%x\n",
+					prev_mask, current_mask);
+				err = -EINVAL;
+				goto out_err;
+			}
+		}
+		prev_mask = current_mask;
+	}
+
+	set->engines[slot].type = I915_GEM_ENGINE_TYPE_PARALLEL;
+	set->engines[slot].num_siblings = num_siblings;
+	set->engines[slot].width = width;
+	set->engines[slot].siblings = siblings;
+
+	return 0;
+
+out_err:
+	kfree(siblings);
+
+	return err;
+}
+
+static const i915_user_extension_fn set_proto_ctx_engines_extensions[] = {
+	[I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE] = set_proto_ctx_engines_balance,
+	[I915_CONTEXT_ENGINES_EXT_BOND] = set_proto_ctx_engines_bond,
+	[I915_CONTEXT_ENGINES_EXT_PARALLEL_SUBMIT] =
+		set_proto_ctx_engines_parallel_submit,
+};
+
+static int set_proto_ctx_engines(struct drm_i915_file_private *fpriv,
+			         struct i915_gem_proto_context *pc,
+			         const struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = fpriv->i915;
+	struct set_proto_ctx_engines set = { .i915 = i915 };
+	struct i915_context_param_engines __user *user =
+		u64_to_user_ptr(args->value);
+	unsigned int n;
+	u64 extensions;
+	int err;
+
+	if (pc->num_user_engines >= 0) {
+		drm_dbg(&i915->drm, "Cannot set engines twice");
+		return -EINVAL;
+	}
+
+	if (args->size < sizeof(*user) ||
+	    !IS_ALIGNED(args->size - sizeof(*user), sizeof(*user->engines))) {
+		drm_dbg(&i915->drm, "Invalid size for engine array: %d\n",
+			args->size);
+		return -EINVAL;
+	}
+
+	set.num_engines = (args->size - sizeof(*user)) / sizeof(*user->engines);
+	/* RING_MASK has no shift so we can use it directly here */
+	if (set.num_engines > I915_EXEC_RING_MASK + 1)
+		return -EINVAL;
+
+	set.engines = kmalloc_array(set.num_engines, sizeof(*set.engines), GFP_KERNEL);
+	if (!set.engines)
+		return -ENOMEM;
+
+	for (n = 0; n < set.num_engines; n++) {
+		struct i915_engine_class_instance ci;
+		struct intel_engine_cs *engine;
+
+		if (copy_from_user(&ci, &user->engines[n], sizeof(ci))) {
+			kfree(set.engines);
+			return -EFAULT;
+		}
+
+		memset(&set.engines[n], 0, sizeof(set.engines[n]));
+
+		if (ci.engine_class == (u16)I915_ENGINE_CLASS_INVALID &&
+		    ci.engine_instance == (u16)I915_ENGINE_CLASS_INVALID_NONE)
+			continue;
+
+		engine = intel_engine_lookup_user(i915,
+						  ci.engine_class,
+						  ci.engine_instance);
+		if (!engine) {
+			drm_dbg(&i915->drm,
+				"Invalid engine[%d]: { class:%d, instance:%d }\n",
+				n, ci.engine_class, ci.engine_instance);
+			kfree(set.engines);
+			return -ENOENT;
+		}
+
+		set.engines[n].type = I915_GEM_ENGINE_TYPE_PHYSICAL;
+		set.engines[n].engine = engine;
+	}
+
+	err = -EFAULT;
+	if (!get_user(extensions, &user->extensions))
+		err = i915_user_extensions(u64_to_user_ptr(extensions),
+					   set_proto_ctx_engines_extensions,
+					   ARRAY_SIZE(set_proto_ctx_engines_extensions),
+					   &set);
+	if (err) {
+		kfree(set.engines);
+		return err;
+	}
+
+	pc->num_user_engines = set.num_engines;
+	pc->user_engines = set.engines;
+
+	return 0;
+}
+
+static int set_proto_ctx_sseu(struct drm_i915_file_private *fpriv,
+			      struct i915_gem_proto_context *pc,
+			      struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = fpriv->i915;
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_sseu *sseu;
+	int ret;
+
+	if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (GRAPHICS_VER(i915) != 11)
+		return -ENODEV;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd)
+		return -EINVAL;
+
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	if (!!(user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX) != (pc->num_user_engines >= 0))
+		return -EINVAL;
+
+	if (pc->num_user_engines >= 0) {
+		int idx = user_sseu.engine.engine_instance;
+		struct i915_gem_proto_engine *pe;
+
+		if (idx >= pc->num_user_engines)
+			return -EINVAL;
+
+		idx = array_index_nospec(idx, pc->num_user_engines);
+		pe = &pc->user_engines[idx];
+
+		/* Only render engine supports RPCS configuration. */
+		if (pe->engine->class != RENDER_CLASS)
+			return -EINVAL;
+
+		sseu = &pe->sseu;
+	} else {
+		/* Only render engine supports RPCS configuration. */
+		if (user_sseu.engine.engine_class != I915_ENGINE_CLASS_RENDER)
+			return -EINVAL;
+
+		/* There is only one render engine */
+		if (user_sseu.engine.engine_instance != 0)
+			return -EINVAL;
+
+		sseu = &pc->legacy_rcs_sseu;
+	}
+
+	ret = i915_gem_user_to_context_sseu(to_gt(i915), &user_sseu, sseu);
+	if (ret)
+		return ret;
+
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
+static int set_proto_ctx_param(struct drm_i915_file_private *fpriv,
+			       struct i915_gem_proto_context *pc,
+			       struct drm_i915_gem_context_param *args)
+{
+	int ret = 0;
+
+	switch (args->param) {
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (args->value)
+			pc->user_flags |= BIT(UCONTEXT_NO_ERROR_CAPTURE);
+		else
+			pc->user_flags &= ~BIT(UCONTEXT_NO_ERROR_CAPTURE);
+		break;
+
+	case I915_CONTEXT_PARAM_BANNABLE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (!capable(CAP_SYS_ADMIN) && !args->value)
+			ret = -EPERM;
+		else if (args->value)
+			pc->user_flags |= BIT(UCONTEXT_BANNABLE);
+		else if (pc->uses_protected_content)
+			ret = -EPERM;
+		else
+			pc->user_flags &= ~BIT(UCONTEXT_BANNABLE);
+		break;
+
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (!args->value)
+			pc->user_flags &= ~BIT(UCONTEXT_RECOVERABLE);
+		else if (pc->uses_protected_content)
+			ret = -EPERM;
+		else
+			pc->user_flags |= BIT(UCONTEXT_RECOVERABLE);
+		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		ret = validate_priority(fpriv->i915, args);
+		if (!ret)
+			pc->sched.priority = args->value;
+		break;
+
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = set_proto_ctx_sseu(fpriv, pc, args);
+		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = set_proto_ctx_vm(fpriv, pc, args);
+		break;
+
+	case I915_CONTEXT_PARAM_ENGINES:
+		ret = set_proto_ctx_engines(fpriv, pc, args);
+		break;
+
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		if (args->size)
+			ret = -EINVAL;
+		else
+			ret = proto_context_set_persistence(fpriv->i915, pc,
+							    args->value);
+		break;
+
+	case I915_CONTEXT_PARAM_PROTECTED_CONTENT:
+		ret = proto_context_set_protected(fpriv->i915, pc,
+						  args->value);
+		break;
+
+	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
+	case I915_CONTEXT_PARAM_RINGSIZE:
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int intel_context_set_gem(struct intel_context *ce,
+				 struct i915_gem_context *ctx,
+				 struct intel_sseu sseu)
+{
+	int ret = 0;
+
+	GEM_BUG_ON(rcu_access_pointer(ce->gem_context));
+	RCU_INIT_POINTER(ce->gem_context, ctx);
+
+	GEM_BUG_ON(intel_context_is_pinned(ce));
+
+	if (ce->engine->class == COMPUTE_CLASS)
+		ce->ring_size = SZ_512K;
+	else
+		ce->ring_size = SZ_16K;
+
+	i915_vm_put(ce->vm);
+	ce->vm = i915_gem_context_get_eb_vm(ctx);
+
+	if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
+	    intel_engine_has_timeslices(ce->engine) &&
+	    intel_engine_has_semaphores(ce->engine))
+		__set_bit(CONTEXT_USE_SEMAPHORES, &ce->flags);
+
+	if (CONFIG_DRM_I915_REQUEST_TIMEOUT &&
+	    ctx->i915->params.request_timeout_ms) {
+		unsigned int timeout_ms = ctx->i915->params.request_timeout_ms;
+
+		intel_context_set_watchdog_us(ce, (u64)timeout_ms * 1000);
+	}
+
+	/* A valid SSEU has no zero fields */
+	if (sseu.slice_mask && !WARN_ON(ce->engine->class != RENDER_CLASS))
+		ret = intel_context_reconfigure_sseu(ce, sseu);
+
+	return ret;
+}
+
+static void __unpin_engines(struct i915_gem_engines *e, unsigned int count)
+{
+	while (count--) {
+		struct intel_context *ce = e->engines[count], *child;
+
+		if (!ce || !test_bit(CONTEXT_PERMA_PIN, &ce->flags))
+			continue;
+
+		for_each_child(ce, child)
+			intel_context_unpin(child);
+		intel_context_unpin(ce);
+	}
+}
+
+static void unpin_engines(struct i915_gem_engines *e)
+{
+	__unpin_engines(e, e->num_engines);
+}
+
+static void __free_engines(struct i915_gem_engines *e, unsigned int count)
+{
+	while (count--) {
+		if (!e->engines[count])
+			continue;
+
+		intel_context_put(e->engines[count]);
+	}
+	kfree(e);
+}
+
+static void free_engines(struct i915_gem_engines *e)
+{
+	__free_engines(e, e->num_engines);
+}
+
+static void free_engines_rcu(struct rcu_head *rcu)
+{
+	struct i915_gem_engines *engines =
+		container_of(rcu, struct i915_gem_engines, rcu);
+
+	i915_sw_fence_fini(&engines->fence);
+	free_engines(engines);
+}
+
+static void accumulate_runtime(struct i915_drm_client *client,
+			       struct i915_gem_engines *engines)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+
+	if (!client)
+		return;
+
+	/* Transfer accumulated runtime to the parent GEM context. */
+	for_each_gem_engine(ce, engines, it) {
+		unsigned int class = ce->engine->uabi_class;
+
+		GEM_BUG_ON(class >= ARRAY_SIZE(client->past_runtime));
+		atomic64_add(intel_context_get_total_runtime_ns(ce),
+			     &client->past_runtime[class]);
+	}
+}
+
+static int
+engines_notify(struct i915_sw_fence *fence, enum i915_sw_fence_notify state)
+{
+	struct i915_gem_engines *engines =
+		container_of(fence, typeof(*engines), fence);
+	struct i915_gem_context *ctx = engines->ctx;
+
+	switch (state) {
+	case FENCE_COMPLETE:
+		if (!list_empty(&engines->link)) {
+			unsigned long flags;
+
+			spin_lock_irqsave(&ctx->stale.lock, flags);
+			list_del(&engines->link);
+			spin_unlock_irqrestore(&ctx->stale.lock, flags);
+		}
+		accumulate_runtime(ctx->client, engines);
+		i915_gem_context_put(ctx);
+
+		break;
+
+	case FENCE_FREE:
+		init_rcu_head(&engines->rcu);
+		call_rcu(&engines->rcu, free_engines_rcu);
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct i915_gem_engines *alloc_engines(unsigned int count)
+{
+	struct i915_gem_engines *e;
+
+	e = kzalloc(struct_size(e, engines, count), GFP_KERNEL);
+	if (!e)
+		return NULL;
+
+	i915_sw_fence_init(&e->fence, engines_notify);
+	return e;
+}
+
+static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx,
+						struct intel_sseu rcs_sseu)
+{
+	const unsigned int max = I915_NUM_ENGINES;
+	struct intel_engine_cs *engine;
+	struct i915_gem_engines *e, *err;
+
+	e = alloc_engines(max);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+
+	for_each_uabi_engine(engine, ctx->i915) {
+		struct intel_context *ce;
+		struct intel_sseu sseu = {};
+		int ret;
+
+		if (engine->legacy_idx == INVALID_ENGINE)
+			continue;
+
+		GEM_BUG_ON(engine->legacy_idx >= max);
+		GEM_BUG_ON(e->engines[engine->legacy_idx]);
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = ERR_CAST(ce);
+			goto free_engines;
+		}
+
+		e->engines[engine->legacy_idx] = ce;
+		e->num_engines = max(e->num_engines, engine->legacy_idx + 1);
+
+		if (engine->class == RENDER_CLASS)
+			sseu = rcs_sseu;
+
+		ret = intel_context_set_gem(ce, ctx, sseu);
+		if (ret) {
+			err = ERR_PTR(ret);
+			goto free_engines;
+		}
+
+	}
+
+	return e;
+
+free_engines:
+	free_engines(e);
+	return err;
+}
+
+static int perma_pin_contexts(struct intel_context *ce)
+{
+	struct intel_context *child;
+	int i = 0, j = 0, ret;
+
+	GEM_BUG_ON(!intel_context_is_parent(ce));
+
+	ret = intel_context_pin(ce);
+	if (unlikely(ret))
+		return ret;
+
+	for_each_child(ce, child) {
+		ret = intel_context_pin(child);
+		if (unlikely(ret))
+			goto unwind;
+		++i;
+	}
+
+	set_bit(CONTEXT_PERMA_PIN, &ce->flags);
+
+	return 0;
+
+unwind:
+	intel_context_unpin(ce);
+	for_each_child(ce, child) {
+		if (j++ < i)
+			intel_context_unpin(child);
+		else
+			break;
+	}
+
+	return ret;
+}
+
+static struct i915_gem_engines *user_engines(struct i915_gem_context *ctx,
+					     unsigned int num_engines,
+					     struct i915_gem_proto_engine *pe)
+{
+	struct i915_gem_engines *e, *err;
+	unsigned int n;
+
+	e = alloc_engines(num_engines);
+	if (!e)
+		return ERR_PTR(-ENOMEM);
+	e->num_engines = num_engines;
+
+	for (n = 0; n < num_engines; n++) {
+		struct intel_context *ce, *child;
+		int ret;
+
+		switch (pe[n].type) {
+		case I915_GEM_ENGINE_TYPE_PHYSICAL:
+			ce = intel_context_create(pe[n].engine);
+			break;
+
+		case I915_GEM_ENGINE_TYPE_BALANCED:
+			ce = intel_engine_create_virtual(pe[n].siblings,
+							 pe[n].num_siblings, 0);
+			break;
+
+		case I915_GEM_ENGINE_TYPE_PARALLEL:
+			ce = intel_engine_create_parallel(pe[n].siblings,
+							  pe[n].num_siblings,
+							  pe[n].width);
+			break;
+
+		case I915_GEM_ENGINE_TYPE_INVALID:
+		default:
+			GEM_WARN_ON(pe[n].type != I915_GEM_ENGINE_TYPE_INVALID);
+			continue;
+		}
+
+		if (IS_ERR(ce)) {
+			err = ERR_CAST(ce);
+			goto free_engines;
+		}
+
+		e->engines[n] = ce;
+
+		ret = intel_context_set_gem(ce, ctx, pe->sseu);
+		if (ret) {
+			err = ERR_PTR(ret);
+			goto free_engines;
+		}
+		for_each_child(ce, child) {
+			ret = intel_context_set_gem(child, ctx, pe->sseu);
+			if (ret) {
+				err = ERR_PTR(ret);
+				goto free_engines;
+			}
+		}
+
+		/*
+		 * XXX: Must be done after calling intel_context_set_gem as that
+		 * function changes the ring size. The ring is allocated when
+		 * the context is pinned. If the ring size is changed after
+		 * allocation we have a mismatch of the ring size and will cause
+		 * the context to hang. Presumably with a bit of reordering we
+		 * could move the perma-pin step to the backend function
+		 * intel_engine_create_parallel.
+		 */
+		if (pe[n].type == I915_GEM_ENGINE_TYPE_PARALLEL) {
+			ret = perma_pin_contexts(ce);
+			if (ret) {
+				err = ERR_PTR(ret);
+				goto free_engines;
+			}
+		}
+	}
+
+	return e;
+
+free_engines:
+	free_engines(e);
+	return err;
+}
+
+static void i915_gem_context_release_work(struct work_struct *work)
+{
+	struct i915_gem_context *ctx = container_of(work, typeof(*ctx),
+						    release_work);
+	struct i915_address_space *vm;
+
+	trace_i915_context_free(ctx);
+	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
+
+	spin_lock(&ctx->i915->gem.contexts.lock);
+	list_del(&ctx->link);
+	spin_unlock(&ctx->i915->gem.contexts.lock);
+
+	if (ctx->syncobj)
+		drm_syncobj_put(ctx->syncobj);
+
+	vm = ctx->vm;
+	if (vm)
+		i915_vm_put(vm);
+
+	if (ctx->pxp_wakeref)
+		intel_runtime_pm_put(&ctx->i915->runtime_pm, ctx->pxp_wakeref);
+
+	if (ctx->client)
+		i915_drm_client_put(ctx->client);
+
+	mutex_destroy(&ctx->engines_mutex);
+	mutex_destroy(&ctx->lut_mutex);
+
+	put_pid(ctx->pid);
+	mutex_destroy(&ctx->mutex);
+
+	kfree_rcu(ctx, rcu);
+}
+
+void i915_gem_context_release(struct kref *ref)
+{
+	struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
+
+	queue_work(ctx->i915->wq, &ctx->release_work);
+}
+
+static inline struct i915_gem_engines *
+__context_engines_static(const struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines, true);
+}
+
+static void __reset_context(struct i915_gem_context *ctx,
+			    struct intel_engine_cs *engine)
+{
+	intel_gt_handle_error(engine->gt, engine->mask, 0,
+			      "context closure in %s", ctx->name);
+}
+
+static bool __cancel_engine(struct intel_engine_cs *engine)
+{
+	/*
+	 * Send a "high priority pulse" down the engine to cause the
+	 * current request to be momentarily preempted. (If it fails to
+	 * be preempted, it will be reset). As we have marked our context
+	 * as banned, any incomplete request, including any running, will
+	 * be skipped following the preemption.
+	 *
+	 * If there is no hangchecking (one of the reasons why we try to
+	 * cancel the context) and no forced preemption, there may be no
+	 * means by which we reset the GPU and evict the persistent hog.
+	 * Ergo if we are unable to inject a preemptive pulse that can
+	 * kill the banned context, we fallback to doing a local reset
+	 * instead.
+	 */
+	return intel_engine_pulse(engine) == 0;
+}
+
+static struct intel_engine_cs *active_engine(struct intel_context *ce)
+{
+	struct intel_engine_cs *engine = NULL;
+	struct i915_request *rq;
+
+	if (intel_context_has_inflight(ce))
+		return intel_context_inflight(ce);
+
+	if (!ce->timeline)
+		return NULL;
+
+	/*
+	 * rq->link is only SLAB_TYPESAFE_BY_RCU, we need to hold a reference
+	 * to the request to prevent it being transferred to a new timeline
+	 * (and onto a new timeline->requests list).
+	 */
+	rcu_read_lock();
+	list_for_each_entry_reverse(rq, &ce->timeline->requests, link) {
+		bool found;
+
+		/* timeline is already completed upto this point? */
+		if (!i915_request_get_rcu(rq))
+			break;
+
+		/* Check with the backend if the request is inflight */
+		found = true;
+		if (likely(rcu_access_pointer(rq->timeline) == ce->timeline))
+			found = i915_request_active_engine(rq, &engine);
+
+		i915_request_put(rq);
+		if (found)
+			break;
+	}
+	rcu_read_unlock();
+
+	return engine;
+}
+
+static void
+kill_engines(struct i915_gem_engines *engines, bool exit, bool persistent)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+
+	/*
+	 * Map the user's engine back to the actual engines; one virtual
+	 * engine will be mapped to multiple engines, and using ctx->engine[]
+	 * the same engine may be have multiple instances in the user's map.
+	 * However, we only care about pending requests, so only include
+	 * engines on which there are incomplete requests.
+	 */
+	for_each_gem_engine(ce, engines, it) {
+		struct intel_engine_cs *engine;
+
+		if ((exit || !persistent) && intel_context_revoke(ce))
+			continue; /* Already marked. */
+
+		/*
+		 * Check the current active state of this context; if we
+		 * are currently executing on the GPU we need to evict
+		 * ourselves. On the other hand, if we haven't yet been
+		 * submitted to the GPU or if everything is complete,
+		 * we have nothing to do.
+		 */
+		engine = active_engine(ce);
+
+		/* First attempt to gracefully cancel the context */
+		if (engine && !__cancel_engine(engine) && (exit || !persistent))
+			/*
+			 * If we are unable to send a preemptive pulse to bump
+			 * the context from the GPU, we have to resort to a full
+			 * reset. We hope the collateral damage is worth it.
+			 */
+			__reset_context(engines->ctx, engine);
+	}
+}
+
+static void kill_context(struct i915_gem_context *ctx)
+{
+	struct i915_gem_engines *pos, *next;
+
+	spin_lock_irq(&ctx->stale.lock);
+	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
+	list_for_each_entry_safe(pos, next, &ctx->stale.engines, link) {
+		if (!i915_sw_fence_await(&pos->fence)) {
+			list_del_init(&pos->link);
+			continue;
+		}
+
+		spin_unlock_irq(&ctx->stale.lock);
+
+		kill_engines(pos, !ctx->i915->params.enable_hangcheck,
+			     i915_gem_context_is_persistent(ctx));
+
+		spin_lock_irq(&ctx->stale.lock);
+		GEM_BUG_ON(i915_sw_fence_signaled(&pos->fence));
+		list_safe_reset_next(pos, next, link);
+		list_del_init(&pos->link); /* decouple from FENCE_COMPLETE */
+
+		i915_sw_fence_complete(&pos->fence);
+	}
+	spin_unlock_irq(&ctx->stale.lock);
+}
+
+static void engines_idle_release(struct i915_gem_context *ctx,
+				 struct i915_gem_engines *engines)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+
+	INIT_LIST_HEAD(&engines->link);
+
+	engines->ctx = i915_gem_context_get(ctx);
+
+	for_each_gem_engine(ce, engines, it) {
+		int err;
+
+		/* serialises with execbuf */
+		intel_context_close(ce);
+		if (!intel_context_pin_if_active(ce))
+			continue;
+
+		/* Wait until context is finally scheduled out and retired */
+		err = i915_sw_fence_await_active(&engines->fence,
+						 &ce->active,
+						 I915_ACTIVE_AWAIT_BARRIER);
+		intel_context_unpin(ce);
+		if (err)
+			goto kill;
+	}
+
+	spin_lock_irq(&ctx->stale.lock);
+	if (!i915_gem_context_is_closed(ctx))
+		list_add_tail(&engines->link, &ctx->stale.engines);
+	spin_unlock_irq(&ctx->stale.lock);
+
+kill:
+	if (list_empty(&engines->link)) /* raced, already closed */
+		kill_engines(engines, true,
+			     i915_gem_context_is_persistent(ctx));
+
+	i915_sw_fence_commit(&engines->fence);
+}
+
+static void set_closed_name(struct i915_gem_context *ctx)
+{
+	char *s;
+
+	/* Replace '[]' with '<>' to indicate closed in debug prints */
+
+	s = strrchr(ctx->name, '[');
+	if (!s)
+		return;
+
+	*s = '<';
+
+	s = strchr(s + 1, ']');
+	if (s)
+		*s = '>';
+}
+
+static void context_close(struct i915_gem_context *ctx)
+{
+	struct i915_drm_client *client;
+
+	/* Flush any concurrent set_engines() */
+	mutex_lock(&ctx->engines_mutex);
+	unpin_engines(__context_engines_static(ctx));
+	engines_idle_release(ctx, rcu_replace_pointer(ctx->engines, NULL, 1));
+	i915_gem_context_set_closed(ctx);
+	mutex_unlock(&ctx->engines_mutex);
+
+	mutex_lock(&ctx->mutex);
+
+	set_closed_name(ctx);
+
+	/*
+	 * The LUT uses the VMA as a backpointer to unref the object,
+	 * so we need to clear the LUT before we close all the VMA (inside
+	 * the ppgtt).
+	 */
+	lut_close(ctx);
+
+	ctx->file_priv = ERR_PTR(-EBADF);
+
+	client = ctx->client;
+	if (client) {
+		spin_lock(&client->ctx_lock);
+		list_del_rcu(&ctx->client_link);
+		spin_unlock(&client->ctx_lock);
+	}
+
+	mutex_unlock(&ctx->mutex);
+
+	/*
+	 * If the user has disabled hangchecking, we can not be sure that
+	 * the batches will ever complete after the context is closed,
+	 * keeping the context and all resources pinned forever. So in this
+	 * case we opt to forcibly kill off all remaining requests on
+	 * context close.
+	 */
+	kill_context(ctx);
+
+	i915_gem_context_put(ctx);
+}
+
+static int __context_set_persistence(struct i915_gem_context *ctx, bool state)
+{
+	if (i915_gem_context_is_persistent(ctx) == state)
+		return 0;
+
+	if (state) {
+		/*
+		 * Only contexts that are short-lived [that will expire or be
+		 * reset] are allowed to survive past termination. We require
+		 * hangcheck to ensure that the persistent requests are healthy.
+		 */
+		if (!ctx->i915->params.enable_hangcheck)
+			return -EINVAL;
+
+		i915_gem_context_set_persistence(ctx);
+	} else {
+		/* To cancel a context we use "preempt-to-idle" */
+		if (!(ctx->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION))
+			return -ENODEV;
+
+		/*
+		 * If the cancel fails, we then need to reset, cleanly!
+		 *
+		 * If the per-engine reset fails, all hope is lost! We resort
+		 * to a full GPU reset in that unlikely case, but realistically
+		 * if the engine could not reset, the full reset does not fare
+		 * much better. The damage has been done.
+		 *
+		 * However, if we cannot reset an engine by itself, we cannot
+		 * cleanup a hanging persistent context without causing
+		 * colateral damage, and we should not pretend we can by
+		 * exposing the interface.
+		 */
+		if (!intel_has_reset_engine(to_gt(ctx->i915)))
+			return -ENODEV;
+
+		i915_gem_context_clear_persistence(ctx);
+	}
+
+	return 0;
+}
+
+static struct i915_gem_context *
+i915_gem_create_context(struct drm_i915_private *i915,
+			const struct i915_gem_proto_context *pc)
+{
+	struct i915_gem_context *ctx;
+	struct i915_address_space *vm = NULL;
+	struct i915_gem_engines *e;
+	int err;
+	int i;
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&ctx->ref);
+	ctx->i915 = i915;
+	ctx->sched = pc->sched;
+	mutex_init(&ctx->mutex);
+	INIT_LIST_HEAD(&ctx->link);
+	INIT_WORK(&ctx->release_work, i915_gem_context_release_work);
+
+	spin_lock_init(&ctx->stale.lock);
+	INIT_LIST_HEAD(&ctx->stale.engines);
+
+	if (pc->vm) {
+		vm = i915_vm_get(pc->vm);
+	} else if (HAS_FULL_PPGTT(i915)) {
+		struct i915_ppgtt *ppgtt;
+
+		ppgtt = i915_ppgtt_create(to_gt(i915), 0);
+		if (IS_ERR(ppgtt)) {
+			drm_dbg(&i915->drm, "PPGTT setup failed (%ld)\n",
+				PTR_ERR(ppgtt));
+			err = PTR_ERR(ppgtt);
+			goto err_ctx;
+		}
+		vm = &ppgtt->vm;
+	}
+	if (vm)
+		ctx->vm = vm;
+
+	mutex_init(&ctx->engines_mutex);
+	if (pc->num_user_engines >= 0) {
+		i915_gem_context_set_user_engines(ctx);
+		e = user_engines(ctx, pc->num_user_engines, pc->user_engines);
+	} else {
+		i915_gem_context_clear_user_engines(ctx);
+		e = default_engines(ctx, pc->legacy_rcs_sseu);
+	}
+	if (IS_ERR(e)) {
+		err = PTR_ERR(e);
+		goto err_vm;
+	}
+	RCU_INIT_POINTER(ctx->engines, e);
+
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	mutex_init(&ctx->lut_mutex);
+
+	/* NB: Mark all slices as needing a remap so that when the context first
+	 * loads it will restore whatever remap state already exists. If there
+	 * is no remap info, it will be a NOP. */
+	ctx->remap_slice = ALL_L3_SLICES(i915);
+
+	ctx->user_flags = pc->user_flags;
+
+	for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp); i++)
+		ctx->hang_timestamp[i] = jiffies - CONTEXT_FAST_HANG_JIFFIES;
+
+	if (pc->single_timeline) {
+		err = drm_syncobj_create(&ctx->syncobj,
+					 DRM_SYNCOBJ_CREATE_SIGNALED,
+					 NULL);
+		if (err)
+			goto err_engines;
+	}
+
+	if (pc->uses_protected_content) {
+		ctx->pxp_wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+		ctx->uses_protected_content = true;
+	}
+
+	trace_i915_context_create(ctx);
+
+	return ctx;
+
+err_engines:
+	free_engines(e);
+err_vm:
+	if (ctx->vm)
+		i915_vm_put(ctx->vm);
+err_ctx:
+	kfree(ctx);
+	return ERR_PTR(err);
+}
+
+static void init_contexts(struct i915_gem_contexts *gc)
+{
+	spin_lock_init(&gc->lock);
+	INIT_LIST_HEAD(&gc->list);
+}
+
+void i915_gem_init__contexts(struct drm_i915_private *i915)
+{
+	init_contexts(&i915->gem.contexts);
+}
+
+/*
+ * Note that this implicitly consumes the ctx reference, by placing
+ * the ctx in the context_xa.
+ */
+static void gem_context_register(struct i915_gem_context *ctx,
+				 struct drm_i915_file_private *fpriv,
+				 u32 id)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	void *old;
+
+	ctx->file_priv = fpriv;
+
+	ctx->pid = get_task_pid(current, PIDTYPE_PID);
+	ctx->client = i915_drm_client_get(fpriv->client);
+
+	snprintf(ctx->name, sizeof(ctx->name), "%s[%d]",
+		 current->comm, pid_nr(ctx->pid));
+
+	spin_lock(&ctx->client->ctx_lock);
+	list_add_tail_rcu(&ctx->client_link, &ctx->client->ctx_list);
+	spin_unlock(&ctx->client->ctx_lock);
+
+	spin_lock(&i915->gem.contexts.lock);
+	list_add_tail(&ctx->link, &i915->gem.contexts.list);
+	spin_unlock(&i915->gem.contexts.lock);
+
+	/* And finally expose ourselves to userspace via the idr */
+	old = xa_store(&fpriv->context_xa, id, ctx, GFP_KERNEL);
+	WARN_ON(old);
+}
+
+int i915_gem_context_open(struct drm_i915_private *i915,
+			  struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_proto_context *pc;
+	struct i915_gem_context *ctx;
+	int err;
+
+	mutex_init(&file_priv->proto_context_lock);
+	xa_init_flags(&file_priv->proto_context_xa, XA_FLAGS_ALLOC);
+
+	/* 0 reserved for the default context */
+	xa_init_flags(&file_priv->context_xa, XA_FLAGS_ALLOC1);
+
+	/* 0 reserved for invalid/unassigned ppgtt */
+	xa_init_flags(&file_priv->vm_xa, XA_FLAGS_ALLOC1);
+
+	pc = proto_context_create(i915, 0);
+	if (IS_ERR(pc)) {
+		err = PTR_ERR(pc);
+		goto err;
+	}
+
+	ctx = i915_gem_create_context(i915, pc);
+	proto_context_close(i915, pc);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto err;
+	}
+
+	gem_context_register(ctx, file_priv, 0);
+
+	return 0;
+
+err:
+	xa_destroy(&file_priv->vm_xa);
+	xa_destroy(&file_priv->context_xa);
+	xa_destroy(&file_priv->proto_context_xa);
+	mutex_destroy(&file_priv->proto_context_lock);
+	return err;
+}
+
+void i915_gem_context_close(struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_proto_context *pc;
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	unsigned long idx;
+
+	xa_for_each(&file_priv->proto_context_xa, idx, pc)
+		proto_context_close(file_priv->i915, pc);
+	xa_destroy(&file_priv->proto_context_xa);
+	mutex_destroy(&file_priv->proto_context_lock);
+
+	xa_for_each(&file_priv->context_xa, idx, ctx)
+		context_close(ctx);
+	xa_destroy(&file_priv->context_xa);
+
+	xa_for_each(&file_priv->vm_xa, idx, vm)
+		i915_vm_put(vm);
+	xa_destroy(&file_priv->vm_xa);
+}
+
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_vm_control *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_ppgtt *ppgtt;
+	u32 id;
+	int err;
+
+	if (!HAS_FULL_PPGTT(i915))
+		return -ENODEV;
+
+	if (args->flags)
+		return -EINVAL;
+
+	ppgtt = i915_ppgtt_create(to_gt(i915), 0);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	if (args->extensions) {
+		err = i915_user_extensions(u64_to_user_ptr(args->extensions),
+					   NULL, 0,
+					   ppgtt);
+		if (err)
+			goto err_put;
+	}
+
+	err = xa_alloc(&file_priv->vm_xa, &id, &ppgtt->vm,
+		       xa_limit_32b, GFP_KERNEL);
+	if (err)
+		goto err_put;
+
+	GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
+	args->vm_id = id;
+	return 0;
+
+err_put:
+	i915_vm_put(&ppgtt->vm);
+	return err;
+}
+
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_vm_control *args = data;
+	struct i915_address_space *vm;
+
+	if (args->flags)
+		return -EINVAL;
+
+	if (args->extensions)
+		return -EINVAL;
+
+	vm = xa_erase(&file_priv->vm_xa, args->vm_id);
+	if (!vm)
+		return -ENOENT;
+
+	i915_vm_put(vm);
+	return 0;
+}
+
+static int get_ppgtt(struct drm_i915_file_private *file_priv,
+		     struct i915_gem_context *ctx,
+		     struct drm_i915_gem_context_param *args)
+{
+	struct i915_address_space *vm;
+	int err;
+	u32 id;
+
+	if (!i915_gem_context_has_full_ppgtt(ctx))
+		return -ENODEV;
+
+	vm = ctx->vm;
+	GEM_BUG_ON(!vm);
+
+	/*
+	 * Get a reference for the allocated handle.  Once the handle is
+	 * visible in the vm_xa table, userspace could try to close it
+	 * from under our feet, so we need to hold the extra reference
+	 * first.
+	 */
+	i915_vm_get(vm);
+
+	err = xa_alloc(&file_priv->vm_xa, &id, vm, xa_limit_32b, GFP_KERNEL);
+	if (err) {
+		i915_vm_put(vm);
+		return err;
+	}
+
+	GEM_BUG_ON(id == 0); /* reserved for invalid/unassigned ppgtt */
+	args->value = id;
+	args->size = 0;
+
+	return err;
+}
+
+int
+i915_gem_user_to_context_sseu(struct intel_gt *gt,
+			      const struct drm_i915_gem_context_param_sseu *user,
+			      struct intel_sseu *context)
+{
+	const struct sseu_dev_info *device = &gt->info.sseu;
+	struct drm_i915_private *i915 = gt->i915;
+	unsigned int dev_subslice_mask = intel_sseu_get_hsw_subslices(device, 0);
+
+	/* No zeros in any field. */
+	if (!user->slice_mask || !user->subslice_mask ||
+	    !user->min_eus_per_subslice || !user->max_eus_per_subslice)
+		return -EINVAL;
+
+	/* Max > min. */
+	if (user->max_eus_per_subslice < user->min_eus_per_subslice)
+		return -EINVAL;
+
+	/*
+	 * Some future proofing on the types since the uAPI is wider than the
+	 * current internal implementation.
+	 */
+	if (overflows_type(user->slice_mask, context->slice_mask) ||
+	    overflows_type(user->subslice_mask, context->subslice_mask) ||
+	    overflows_type(user->min_eus_per_subslice,
+			   context->min_eus_per_subslice) ||
+	    overflows_type(user->max_eus_per_subslice,
+			   context->max_eus_per_subslice))
+		return -EINVAL;
+
+	/* Check validity against hardware. */
+	if (user->slice_mask & ~device->slice_mask)
+		return -EINVAL;
+
+	if (user->subslice_mask & ~dev_subslice_mask)
+		return -EINVAL;
+
+	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
+		return -EINVAL;
+
+	context->slice_mask = user->slice_mask;
+	context->subslice_mask = user->subslice_mask;
+	context->min_eus_per_subslice = user->min_eus_per_subslice;
+	context->max_eus_per_subslice = user->max_eus_per_subslice;
+
+	/* Part specific restrictions. */
+	if (GRAPHICS_VER(i915) == 11) {
+		unsigned int hw_s = hweight8(device->slice_mask);
+		unsigned int hw_ss_per_s = hweight8(dev_subslice_mask);
+		unsigned int req_s = hweight8(context->slice_mask);
+		unsigned int req_ss = hweight8(context->subslice_mask);
+
+		/*
+		 * Only full subslice enablement is possible if more than one
+		 * slice is turned on.
+		 */
+		if (req_s > 1 && req_ss != hw_ss_per_s)
+			return -EINVAL;
+
+		/*
+		 * If more than four (SScount bitfield limit) subslices are
+		 * requested then the number has to be even.
+		 */
+		if (req_ss > 4 && (req_ss & 1))
+			return -EINVAL;
+
+		/*
+		 * If only one slice is enabled and subslice count is below the
+		 * device full enablement, it must be at most half of the all
+		 * available subslices.
+		 */
+		if (req_s == 1 && req_ss < hw_ss_per_s &&
+		    req_ss > (hw_ss_per_s / 2))
+			return -EINVAL;
+
+		/* ABI restriction - VME use case only. */
+
+		/* All slices or one slice only. */
+		if (req_s != 1 && req_s != hw_s)
+			return -EINVAL;
+
+		/*
+		 * Half subslices or full enablement only when one slice is
+		 * enabled.
+		 */
+		if (req_s == 1 &&
+		    (req_ss != hw_ss_per_s && req_ss != (hw_ss_per_s / 2)))
+			return -EINVAL;
+
+		/* No EU configuration changes. */
+		if ((user->min_eus_per_subslice !=
+		     device->max_eus_per_subslice) ||
+		    (user->max_eus_per_subslice !=
+		     device->max_eus_per_subslice))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int set_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_context *ce;
+	struct intel_sseu sseu;
+	unsigned long lookup;
+	int ret;
+
+	if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (GRAPHICS_VER(i915) != 11)
+		return -ENODEV;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd)
+		return -EINVAL;
+
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	ce = lookup_user_engine(ctx, lookup, &user_sseu.engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	/* Only render engine supports RPCS configuration. */
+	if (ce->engine->class != RENDER_CLASS) {
+		ret = -ENODEV;
+		goto out_ce;
+	}
+
+	ret = i915_gem_user_to_context_sseu(ce->engine->gt, &user_sseu, &sseu);
+	if (ret)
+		goto out_ce;
+
+	ret = intel_context_reconfigure_sseu(ce, sseu);
+	if (ret)
+		goto out_ce;
+
+	args->size = sizeof(user_sseu);
+
+out_ce:
+	intel_context_put(ce);
+	return ret;
+}
+
+static int
+set_persistence(struct i915_gem_context *ctx,
+		const struct drm_i915_gem_context_param *args)
+{
+	if (args->size)
+		return -EINVAL;
+
+	return __context_set_persistence(ctx, args->value);
+}
+
+static int set_priority(struct i915_gem_context *ctx,
+			const struct drm_i915_gem_context_param *args)
+{
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	int err;
+
+	err = validate_priority(ctx->i915, args);
+	if (err)
+		return err;
+
+	ctx->sched.priority = args->value;
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		if (!intel_engine_has_timeslices(ce->engine))
+			continue;
+
+		if (ctx->sched.priority >= I915_PRIORITY_NORMAL &&
+		    intel_engine_has_semaphores(ce->engine))
+			intel_context_set_use_semaphores(ce);
+		else
+			intel_context_clear_use_semaphores(ce);
+	}
+	i915_gem_context_unlock_engines(ctx);
+
+	return 0;
+}
+
+static int get_protected(struct i915_gem_context *ctx,
+			 struct drm_i915_gem_context_param *args)
+{
+	args->size = 0;
+	args->value = i915_gem_context_uses_protected_content(ctx);
+
+	return 0;
+}
+
+static int ctx_setparam(struct drm_i915_file_private *fpriv,
+			struct i915_gem_context *ctx,
+			struct drm_i915_gem_context_param *args)
+{
+	int ret = 0;
+
+	switch (args->param) {
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (args->value)
+			i915_gem_context_set_no_error_capture(ctx);
+		else
+			i915_gem_context_clear_no_error_capture(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_BANNABLE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (!capable(CAP_SYS_ADMIN) && !args->value)
+			ret = -EPERM;
+		else if (args->value)
+			i915_gem_context_set_bannable(ctx);
+		else if (i915_gem_context_uses_protected_content(ctx))
+			ret = -EPERM; /* can't clear this for protected contexts */
+		else
+			i915_gem_context_clear_bannable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		if (args->size)
+			ret = -EINVAL;
+		else if (!args->value)
+			i915_gem_context_clear_recoverable(ctx);
+		else if (i915_gem_context_uses_protected_content(ctx))
+			ret = -EPERM; /* can't set this for protected contexts */
+		else
+			i915_gem_context_set_recoverable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		ret = set_priority(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = set_sseu(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		ret = set_persistence(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_PROTECTED_CONTENT:
+	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
+	case I915_CONTEXT_PARAM_RINGSIZE:
+	case I915_CONTEXT_PARAM_VM:
+	case I915_CONTEXT_PARAM_ENGINES:
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+struct create_ext {
+	struct i915_gem_proto_context *pc;
+	struct drm_i915_file_private *fpriv;
+};
+
+static int create_setparam(struct i915_user_extension __user *ext, void *data)
+{
+	struct drm_i915_gem_context_create_ext_setparam local;
+	const struct create_ext *arg = data;
+
+	if (copy_from_user(&local, ext, sizeof(local)))
+		return -EFAULT;
+
+	if (local.param.ctx_id)
+		return -EINVAL;
+
+	return set_proto_ctx_param(arg->fpriv, arg->pc, &local.param);
+}
+
+static int invalid_ext(struct i915_user_extension __user *ext, void *data)
+{
+	return -EINVAL;
+}
+
+static const i915_user_extension_fn create_extensions[] = {
+	[I915_CONTEXT_CREATE_EXT_SETPARAM] = create_setparam,
+	[I915_CONTEXT_CREATE_EXT_CLONE] = invalid_ext,
+};
+
+static bool client_is_banned(struct drm_i915_file_private *file_priv)
+{
+	return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
+}
+
+static inline struct i915_gem_context *
+__context_lookup(struct drm_i915_file_private *file_priv, u32 id)
+{
+	struct i915_gem_context *ctx;
+
+	rcu_read_lock();
+	ctx = xa_load(&file_priv->context_xa, id);
+	if (ctx && !kref_get_unless_zero(&ctx->ref))
+		ctx = NULL;
+	rcu_read_unlock();
+
+	return ctx;
+}
+
+static struct i915_gem_context *
+finalize_create_context_locked(struct drm_i915_file_private *file_priv,
+			       struct i915_gem_proto_context *pc, u32 id)
+{
+	struct i915_gem_context *ctx;
+	void *old;
+
+	lockdep_assert_held(&file_priv->proto_context_lock);
+
+	ctx = i915_gem_create_context(file_priv->i915, pc);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	/*
+	 * One for the xarray and one for the caller.  We need to grab
+	 * the reference *prior* to making the ctx visble to userspace
+	 * in gem_context_register(), as at any point after that
+	 * userspace can try to race us with another thread destroying
+	 * the context under our feet.
+	 */
+	i915_gem_context_get(ctx);
+
+	gem_context_register(ctx, file_priv, id);
+
+	old = xa_erase(&file_priv->proto_context_xa, id);
+	GEM_BUG_ON(old != pc);
+	proto_context_close(file_priv->i915, pc);
+
+	return ctx;
+}
+
+struct i915_gem_context *
+i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id)
+{
+	struct i915_gem_proto_context *pc;
+	struct i915_gem_context *ctx;
+
+	ctx = __context_lookup(file_priv, id);
+	if (ctx)
+		return ctx;
+
+	mutex_lock(&file_priv->proto_context_lock);
+	/* Try one more time under the lock */
+	ctx = __context_lookup(file_priv, id);
+	if (!ctx) {
+		pc = xa_load(&file_priv->proto_context_xa, id);
+		if (!pc)
+			ctx = ERR_PTR(-ENOENT);
+		else
+			ctx = finalize_create_context_locked(file_priv, pc, id);
+	}
+	mutex_unlock(&file_priv->proto_context_lock);
+
+	return ctx;
+}
+
+int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_context_create_ext *args = data;
+	struct create_ext ext_data;
+	int ret;
+	u32 id;
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return -ENODEV;
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_UNKNOWN)
+		return -EINVAL;
+
+	ret = intel_gt_terminally_wedged(to_gt(i915));
+	if (ret)
+		return ret;
+
+	ext_data.fpriv = file->driver_priv;
+	if (client_is_banned(ext_data.fpriv)) {
+		drm_dbg(&i915->drm,
+			"client %s[%d] banned from creating ctx\n",
+			current->comm, task_pid_nr(current));
+		return -EIO;
+	}
+
+	ext_data.pc = proto_context_create(i915, args->flags);
+	if (IS_ERR(ext_data.pc))
+		return PTR_ERR(ext_data.pc);
+
+	if (args->flags & I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS) {
+		ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
+					   create_extensions,
+					   ARRAY_SIZE(create_extensions),
+					   &ext_data);
+		if (ret)
+			goto err_pc;
+	}
+
+	if (GRAPHICS_VER(i915) > 12) {
+		struct i915_gem_context *ctx;
+
+		/* Get ourselves a context ID */
+		ret = xa_alloc(&ext_data.fpriv->context_xa, &id, NULL,
+			       xa_limit_32b, GFP_KERNEL);
+		if (ret)
+			goto err_pc;
+
+		ctx = i915_gem_create_context(i915, ext_data.pc);
+		if (IS_ERR(ctx)) {
+			ret = PTR_ERR(ctx);
+			goto err_pc;
+		}
+
+		proto_context_close(i915, ext_data.pc);
+		gem_context_register(ctx, ext_data.fpriv, id);
+	} else {
+		ret = proto_context_register(ext_data.fpriv, ext_data.pc, &id);
+		if (ret < 0)
+			goto err_pc;
+	}
+
+	args->ctx_id = id;
+
+	return 0;
+
+err_pc:
+	proto_context_close(i915, ext_data.pc);
+	return ret;
+}
+
+int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file)
+{
+	struct drm_i915_gem_context_destroy *args = data;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct i915_gem_proto_context *pc;
+	struct i915_gem_context *ctx;
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	if (!args->ctx_id)
+		return -ENOENT;
+
+	/* We need to hold the proto-context lock here to prevent races
+	 * with finalize_create_context_locked().
+	 */
+	mutex_lock(&file_priv->proto_context_lock);
+	ctx = xa_erase(&file_priv->context_xa, args->ctx_id);
+	pc = xa_erase(&file_priv->proto_context_xa, args->ctx_id);
+	mutex_unlock(&file_priv->proto_context_lock);
+
+	if (!ctx && !pc)
+		return -ENOENT;
+	GEM_WARN_ON(ctx && pc);
+
+	if (pc)
+		proto_context_close(file_priv->i915, pc);
+
+	if (ctx)
+		context_close(ctx);
+
+	return 0;
+}
+
+static int get_sseu(struct i915_gem_context *ctx,
+		    struct drm_i915_gem_context_param *args)
+{
+	struct drm_i915_gem_context_param_sseu user_sseu;
+	struct intel_context *ce;
+	unsigned long lookup;
+	int err;
+
+	if (args->size == 0)
+		goto out;
+	else if (args->size < sizeof(user_sseu))
+		return -EINVAL;
+
+	if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value),
+			   sizeof(user_sseu)))
+		return -EFAULT;
+
+	if (user_sseu.rsvd)
+		return -EINVAL;
+
+	if (user_sseu.flags & ~(I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX))
+		return -EINVAL;
+
+	lookup = 0;
+	if (user_sseu.flags & I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX)
+		lookup |= LOOKUP_USER_INDEX;
+
+	ce = lookup_user_engine(ctx, lookup, &user_sseu.engine);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	err = intel_context_lock_pinned(ce); /* serialises with set_sseu */
+	if (err) {
+		intel_context_put(ce);
+		return err;
+	}
+
+	user_sseu.slice_mask = ce->sseu.slice_mask;
+	user_sseu.subslice_mask = ce->sseu.subslice_mask;
+	user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice;
+	user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice;
+
+	intel_context_unlock_pinned(ce);
+	intel_context_put(ce);
+
+	if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu,
+			 sizeof(user_sseu)))
+		return -EFAULT;
+
+out:
+	args->size = sizeof(user_sseu);
+
+	return 0;
+}
+
+int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_context *ctx;
+	struct i915_address_space *vm;
+	int ret = 0;
+
+	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	switch (args->param) {
+	case I915_CONTEXT_PARAM_GTT_SIZE:
+		args->size = 0;
+		vm = i915_gem_context_get_eb_vm(ctx);
+		args->value = vm->total;
+		i915_vm_put(vm);
+
+		break;
+
+	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
+		args->size = 0;
+		args->value = i915_gem_context_no_error_capture(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_BANNABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_bannable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_RECOVERABLE:
+		args->size = 0;
+		args->value = i915_gem_context_is_recoverable(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_PRIORITY:
+		args->size = 0;
+		args->value = ctx->sched.priority;
+		break;
+
+	case I915_CONTEXT_PARAM_SSEU:
+		ret = get_sseu(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_VM:
+		ret = get_ppgtt(file_priv, ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_PERSISTENCE:
+		args->size = 0;
+		args->value = i915_gem_context_is_persistent(ctx);
+		break;
+
+	case I915_CONTEXT_PARAM_PROTECTED_CONTENT:
+		ret = get_protected(ctx, args);
+		break;
+
+	case I915_CONTEXT_PARAM_NO_ZEROMAP:
+	case I915_CONTEXT_PARAM_BAN_PERIOD:
+	case I915_CONTEXT_PARAM_ENGINES:
+	case I915_CONTEXT_PARAM_RINGSIZE:
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	i915_gem_context_put(ctx);
+	return ret;
+}
+
+int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_gem_context_param *args = data;
+	struct i915_gem_proto_context *pc;
+	struct i915_gem_context *ctx;
+	int ret = 0;
+
+	mutex_lock(&file_priv->proto_context_lock);
+	ctx = __context_lookup(file_priv, args->ctx_id);
+	if (!ctx) {
+		pc = xa_load(&file_priv->proto_context_xa, args->ctx_id);
+		if (pc) {
+			/* Contexts should be finalized inside
+			 * GEM_CONTEXT_CREATE starting with graphics
+			 * version 13.
+			 */
+			WARN_ON(GRAPHICS_VER(file_priv->i915) > 12);
+			ret = set_proto_ctx_param(file_priv, pc, args);
+		} else {
+			ret = -ENOENT;
+		}
+	}
+	mutex_unlock(&file_priv->proto_context_lock);
+
+	if (ctx) {
+		ret = ctx_setparam(file_priv, ctx, args);
+		i915_gem_context_put(ctx);
+	}
+
+	return ret;
+}
+
+int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
+				       void *data, struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_reset_stats *args = data;
+	struct i915_gem_context *ctx;
+
+	if (args->flags || args->pad)
+		return -EINVAL;
+
+	ctx = i915_gem_context_lookup(file->driver_priv, args->ctx_id);
+	if (IS_ERR(ctx))
+		return PTR_ERR(ctx);
+
+	/*
+	 * We opt for unserialised reads here. This may result in tearing
+	 * in the extremely unlikely event of a GPU hang on this context
+	 * as we are querying them. If we need that extra layer of protection,
+	 * we should wrap the hangstats with a seqlock.
+	 */
+
+	if (capable(CAP_SYS_ADMIN))
+		args->reset_count = i915_reset_count(&i915->gpu_error);
+	else
+		args->reset_count = 0;
+
+	args->batch_active = atomic_read(&ctx->guilty_count);
+	args->batch_pending = atomic_read(&ctx->active_count);
+
+	i915_gem_context_put(ctx);
+	return 0;
+}
+
+/* GEM context-engines iterator: for_each_gem_engine() */
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it)
+{
+	const struct i915_gem_engines *e = it->engines;
+	struct intel_context *ctx;
+
+	if (unlikely(!e))
+		return NULL;
+
+	do {
+		if (it->idx >= e->num_engines)
+			return NULL;
+
+		ctx = e->engines[it->idx++];
+	} while (!ctx);
+
+	return ctx;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_context.c"
+#include "selftests/i915_gem_context.c"
+#endif
+
+void i915_gem_context_module_exit(void)
+{
+	kmem_cache_destroy(slab_luts);
+}
+
+int __init i915_gem_context_module_init(void)
+{
+	slab_luts = KMEM_CACHE(i915_lut_handle, 0);
+	if (!slab_luts)
+		return -ENOMEM;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h
new file mode 100644
index 0000000000..e5b0f66ea1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h
@@ -0,0 +1,248 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CONTEXT_H__
+#define __I915_GEM_CONTEXT_H__
+
+#include "i915_gem_context_types.h"
+
+#include "gt/intel_context.h"
+
+#include "i915_drv.h"
+#include "i915_gem.h"
+#include "i915_scheduler.h"
+#include "intel_device_info.h"
+
+struct drm_device;
+struct drm_file;
+
+static inline bool i915_gem_context_is_closed(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_CLOSED, &ctx->flags);
+}
+
+static inline void i915_gem_context_set_closed(struct i915_gem_context *ctx)
+{
+	GEM_BUG_ON(i915_gem_context_is_closed(ctx));
+	set_bit(CONTEXT_CLOSED, &ctx->flags);
+}
+
+static inline bool i915_gem_context_no_error_capture(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_no_error_capture(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_no_error_capture(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_NO_ERROR_CAPTURE, &ctx->user_flags);
+}
+
+static inline bool i915_gem_context_is_bannable(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_BANNABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_bannable(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_BANNABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_bannable(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_BANNABLE, &ctx->user_flags);
+}
+
+static inline bool i915_gem_context_is_recoverable(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_recoverable(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_recoverable(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_RECOVERABLE, &ctx->user_flags);
+}
+
+static inline bool i915_gem_context_is_persistent(const struct i915_gem_context *ctx)
+{
+	return test_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_set_persistence(struct i915_gem_context *ctx)
+{
+	set_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline void i915_gem_context_clear_persistence(struct i915_gem_context *ctx)
+{
+	clear_bit(UCONTEXT_PERSISTENCE, &ctx->user_flags);
+}
+
+static inline bool
+i915_gem_context_user_engines(const struct i915_gem_context *ctx)
+{
+	return test_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_set_user_engines(struct i915_gem_context *ctx)
+{
+	set_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline void
+i915_gem_context_clear_user_engines(struct i915_gem_context *ctx)
+{
+	clear_bit(CONTEXT_USER_ENGINES, &ctx->flags);
+}
+
+static inline bool
+i915_gem_context_uses_protected_content(const struct i915_gem_context *ctx)
+{
+	return ctx->uses_protected_content;
+}
+
+/* i915_gem_context.c */
+void i915_gem_init__contexts(struct drm_i915_private *i915);
+
+int i915_gem_context_open(struct drm_i915_private *i915,
+			  struct drm_file *file);
+void i915_gem_context_close(struct drm_file *file);
+
+void i915_gem_context_release(struct kref *ctx_ref);
+
+int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_vm_destroy_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+
+int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
+				  struct drm_file *file);
+int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
+				   struct drm_file *file);
+int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
+				    struct drm_file *file_priv);
+int i915_gem_context_reset_stats_ioctl(struct drm_device *dev, void *data,
+				       struct drm_file *file);
+
+struct i915_gem_context *
+i915_gem_context_lookup(struct drm_i915_file_private *file_priv, u32 id);
+
+static inline struct i915_gem_context *
+i915_gem_context_get(struct i915_gem_context *ctx)
+{
+	kref_get(&ctx->ref);
+	return ctx;
+}
+
+static inline void i915_gem_context_put(struct i915_gem_context *ctx)
+{
+	kref_put(&ctx->ref, i915_gem_context_release);
+}
+
+static inline struct i915_address_space *
+i915_gem_context_vm(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->vm, lockdep_is_held(&ctx->mutex));
+}
+
+static inline bool i915_gem_context_has_full_ppgtt(struct i915_gem_context *ctx)
+{
+	GEM_BUG_ON(!!ctx->vm != HAS_FULL_PPGTT(ctx->i915));
+
+	return !!ctx->vm;
+}
+
+static inline struct i915_address_space *
+i915_gem_context_get_eb_vm(struct i915_gem_context *ctx)
+{
+	struct i915_address_space *vm;
+
+	vm = ctx->vm;
+	if (!vm)
+		vm = &to_gt(ctx->i915)->ggtt->vm;
+	vm = i915_vm_get(vm);
+
+	return vm;
+}
+
+static inline struct i915_gem_engines *
+i915_gem_context_engines(struct i915_gem_context *ctx)
+{
+	return rcu_dereference_protected(ctx->engines,
+					 lockdep_is_held(&ctx->engines_mutex));
+}
+
+static inline struct i915_gem_engines *
+i915_gem_context_lock_engines(struct i915_gem_context *ctx)
+	__acquires(&ctx->engines_mutex)
+{
+	mutex_lock(&ctx->engines_mutex);
+	return i915_gem_context_engines(ctx);
+}
+
+static inline void
+i915_gem_context_unlock_engines(struct i915_gem_context *ctx)
+	__releases(&ctx->engines_mutex)
+{
+	mutex_unlock(&ctx->engines_mutex);
+}
+
+static inline struct intel_context *
+i915_gem_context_get_engine(struct i915_gem_context *ctx, unsigned int idx)
+{
+	struct intel_context *ce;
+
+	rcu_read_lock(); {
+		struct i915_gem_engines *e = rcu_dereference(ctx->engines);
+		if (unlikely(!e)) /* context was closed! */
+			ce = ERR_PTR(-ENOENT);
+		else if (likely(idx < e->num_engines && e->engines[idx]))
+			ce = intel_context_get(e->engines[idx]);
+		else
+			ce = ERR_PTR(-EINVAL);
+	} rcu_read_unlock();
+
+	return ce;
+}
+
+static inline void
+i915_gem_engines_iter_init(struct i915_gem_engines_iter *it,
+			   struct i915_gem_engines *engines)
+{
+	it->engines = engines;
+	it->idx = 0;
+}
+
+struct intel_context *
+i915_gem_engines_iter_next(struct i915_gem_engines_iter *it);
+
+#define for_each_gem_engine(ce, engines, it) \
+	for (i915_gem_engines_iter_init(&(it), (engines)); \
+	     ((ce) = i915_gem_engines_iter_next(&(it)));)
+
+void i915_gem_context_module_exit(void);
+int i915_gem_context_module_init(void);
+
+struct i915_lut_handle *i915_lut_handle_alloc(void);
+void i915_lut_handle_free(struct i915_lut_handle *lut);
+
+int i915_gem_user_to_context_sseu(struct intel_gt *gt,
+				  const struct drm_i915_gem_context_param_sseu *user,
+				  struct intel_sseu *context);
+
+#endif /* !__I915_GEM_CONTEXT_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
new file mode 100644
index 0000000000..cb78214a7d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h
@@ -0,0 +1,419 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CONTEXT_TYPES_H__
+#define __I915_GEM_CONTEXT_TYPES_H__
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/llist.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
+#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+
+#include "gt/intel_context_types.h"
+
+#include "i915_scheduler.h"
+#include "i915_sw_fence.h"
+
+struct pid;
+
+struct drm_i915_private;
+struct drm_i915_file_private;
+struct i915_address_space;
+struct intel_timeline;
+struct intel_ring;
+
+/**
+ * struct i915_gem_engines - A set of engines
+ */
+struct i915_gem_engines {
+	union {
+		/** @link: Link in i915_gem_context::stale::engines */
+		struct list_head link;
+
+		/** @rcu: RCU to use when freeing */
+		struct rcu_head rcu;
+	};
+
+	/** @fence: Fence used for delayed destruction of engines */
+	struct i915_sw_fence fence;
+
+	/** @ctx: i915_gem_context backpointer */
+	struct i915_gem_context *ctx;
+
+	/** @num_engines: Number of engines in this set */
+	unsigned int num_engines;
+
+	/** @engines: Array of engines */
+	struct intel_context *engines[];
+};
+
+/**
+ * struct i915_gem_engines_iter - Iterator for an i915_gem_engines set
+ */
+struct i915_gem_engines_iter {
+	/** @idx: Index into i915_gem_engines::engines */
+	unsigned int idx;
+
+	/** @engines: Engine set being iterated */
+	const struct i915_gem_engines *engines;
+};
+
+/**
+ * enum i915_gem_engine_type - Describes the type of an i915_gem_proto_engine
+ */
+enum i915_gem_engine_type {
+	/** @I915_GEM_ENGINE_TYPE_INVALID: An invalid engine */
+	I915_GEM_ENGINE_TYPE_INVALID = 0,
+
+	/** @I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine */
+	I915_GEM_ENGINE_TYPE_PHYSICAL,
+
+	/** @I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set */
+	I915_GEM_ENGINE_TYPE_BALANCED,
+
+	/** @I915_GEM_ENGINE_TYPE_PARALLEL: A parallel engine set */
+	I915_GEM_ENGINE_TYPE_PARALLEL,
+};
+
+/**
+ * struct i915_gem_proto_engine - prototype engine
+ *
+ * This struct describes an engine that a context may contain.  Engines
+ * have four types:
+ *
+ *  - I915_GEM_ENGINE_TYPE_INVALID: Invalid engines can be created but they
+ *    show up as a NULL in i915_gem_engines::engines[i] and any attempt to
+ *    use them by the user results in -EINVAL.  They are also useful during
+ *    proto-context construction because the client may create invalid
+ *    engines and then set them up later as virtual engines.
+ *
+ *  - I915_GEM_ENGINE_TYPE_PHYSICAL: A single physical engine, described by
+ *    i915_gem_proto_engine::engine.
+ *
+ *  - I915_GEM_ENGINE_TYPE_BALANCED: A load-balanced engine set, described
+ *    i915_gem_proto_engine::num_siblings and i915_gem_proto_engine::siblings.
+ *
+ *  - I915_GEM_ENGINE_TYPE_PARALLEL: A parallel submission engine set, described
+ *    i915_gem_proto_engine::width, i915_gem_proto_engine::num_siblings, and
+ *    i915_gem_proto_engine::siblings.
+ */
+struct i915_gem_proto_engine {
+	/** @type: Type of this engine */
+	enum i915_gem_engine_type type;
+
+	/** @engine: Engine, for physical */
+	struct intel_engine_cs *engine;
+
+	/** @num_siblings: Number of balanced or parallel siblings */
+	unsigned int num_siblings;
+
+	/** @width: Width of each sibling */
+	unsigned int width;
+
+	/** @siblings: Balanced siblings or num_siblings * width for parallel */
+	struct intel_engine_cs **siblings;
+
+	/** @sseu: Client-set SSEU parameters */
+	struct intel_sseu sseu;
+};
+
+/**
+ * struct i915_gem_proto_context - prototype context
+ *
+ * The struct i915_gem_proto_context represents the creation parameters for
+ * a struct i915_gem_context.  This is used to gather parameters provided
+ * either through creation flags or via SET_CONTEXT_PARAM so that, when we
+ * create the final i915_gem_context, those parameters can be immutable.
+ *
+ * The context uAPI allows for two methods of setting context parameters:
+ * SET_CONTEXT_PARAM and CONTEXT_CREATE_EXT_SETPARAM.  The former is
+ * allowed to be called at any time while the later happens as part of
+ * GEM_CONTEXT_CREATE.  When these were initially added, Currently,
+ * everything settable via one is settable via the other.  While some
+ * params are fairly simple and setting them on a live context is harmless
+ * such the context priority, others are far trickier such as the VM or the
+ * set of engines.  To avoid some truly nasty race conditions, we don't
+ * allow setting the VM or the set of engines on live contexts.
+ *
+ * The way we dealt with this without breaking older userspace that sets
+ * the VM or engine set via SET_CONTEXT_PARAM is to delay the creation of
+ * the actual context until after the client is done configuring it with
+ * SET_CONTEXT_PARAM.  From the perspective of the client, it has the same
+ * u32 context ID the whole time.  From the perspective of i915, however,
+ * it's an i915_gem_proto_context right up until the point where we attempt
+ * to do something which the proto-context can't handle at which point the
+ * real context gets created.
+ *
+ * This is accomplished via a little xarray dance.  When GEM_CONTEXT_CREATE
+ * is called, we create a proto-context, reserve a slot in context_xa but
+ * leave it NULL, the proto-context in the corresponding slot in
+ * proto_context_xa.  Then, whenever we go to look up a context, we first
+ * check context_xa.  If it's there, we return the i915_gem_context and
+ * we're done.  If it's not, we look in proto_context_xa and, if we find it
+ * there, we create the actual context and kill the proto-context.
+ *
+ * At the time we made this change (April, 2021), we did a fairly complete
+ * audit of existing userspace to ensure this wouldn't break anything:
+ *
+ *  - Mesa/i965 didn't use the engines or VM APIs at all
+ *
+ *  - Mesa/ANV used the engines API but via CONTEXT_CREATE_EXT_SETPARAM and
+ *    didn't use the VM API.
+ *
+ *  - Mesa/iris didn't use the engines or VM APIs at all
+ *
+ *  - The open-source compute-runtime didn't yet use the engines API but
+ *    did use the VM API via SET_CONTEXT_PARAM.  However, CONTEXT_SETPARAM
+ *    was always the second ioctl on that context, immediately following
+ *    GEM_CONTEXT_CREATE.
+ *
+ *  - The media driver sets engines and bonding/balancing via
+ *    SET_CONTEXT_PARAM.  However, CONTEXT_SETPARAM to set the VM was
+ *    always the second ioctl on that context, immediately following
+ *    GEM_CONTEXT_CREATE and setting engines immediately followed that.
+ *
+ * In order for this dance to work properly, any modification to an
+ * i915_gem_proto_context that is exposed to the client via
+ * drm_i915_file_private::proto_context_xa must be guarded by
+ * drm_i915_file_private::proto_context_lock.  The exception is when a
+ * proto-context has not yet been exposed such as when handling
+ * CONTEXT_CREATE_SET_PARAM during GEM_CONTEXT_CREATE.
+ */
+struct i915_gem_proto_context {
+	/** @vm: See &i915_gem_context.vm */
+	struct i915_address_space *vm;
+
+	/** @user_flags: See &i915_gem_context.user_flags */
+	unsigned long user_flags;
+
+	/** @sched: See &i915_gem_context.sched */
+	struct i915_sched_attr sched;
+
+	/** @num_user_engines: Number of user-specified engines or -1 */
+	int num_user_engines;
+
+	/** @user_engines: User-specified engines */
+	struct i915_gem_proto_engine *user_engines;
+
+	/** @legacy_rcs_sseu: Client-set SSEU parameters for the legacy RCS */
+	struct intel_sseu legacy_rcs_sseu;
+
+	/** @single_timeline: See See &i915_gem_context.syncobj */
+	bool single_timeline;
+
+	/** @uses_protected_content: See &i915_gem_context.uses_protected_content */
+	bool uses_protected_content;
+
+	/** @pxp_wakeref: See &i915_gem_context.pxp_wakeref */
+	intel_wakeref_t pxp_wakeref;
+};
+
+/**
+ * struct i915_gem_context - client state
+ *
+ * The struct i915_gem_context represents the combined view of the driver and
+ * logical hardware state for a particular client.
+ */
+struct i915_gem_context {
+	/** @i915: i915 device backpointer */
+	struct drm_i915_private *i915;
+
+	/** @file_priv: owning file descriptor */
+	struct drm_i915_file_private *file_priv;
+
+	/**
+	 * @engines: User defined engines for this context
+	 *
+	 * Various uAPI offer the ability to lookup up an
+	 * index from this array to select an engine operate on.
+	 *
+	 * Multiple logically distinct instances of the same engine
+	 * may be defined in the array, as well as composite virtual
+	 * engines.
+	 *
+	 * Execbuf uses the I915_EXEC_RING_MASK as an index into this
+	 * array to select which HW context + engine to execute on. For
+	 * the default array, the user_ring_map[] is used to translate
+	 * the legacy uABI onto the approprate index (e.g. both
+	 * I915_EXEC_DEFAULT and I915_EXEC_RENDER select the same
+	 * context, and I915_EXEC_BSD is weird). For a use defined
+	 * array, execbuf uses I915_EXEC_RING_MASK as a plain index.
+	 *
+	 * User defined by I915_CONTEXT_PARAM_ENGINE (when the
+	 * CONTEXT_USER_ENGINES flag is set).
+	 */
+	struct i915_gem_engines __rcu *engines;
+
+	/** @engines_mutex: guards writes to engines */
+	struct mutex engines_mutex;
+
+	/**
+	 * @syncobj: Shared timeline syncobj
+	 *
+	 * When the SHARED_TIMELINE flag is set on context creation, we
+	 * emulate a single timeline across all engines using this syncobj.
+	 * For every execbuffer2 call, this syncobj is used as both an in-
+	 * and out-fence.  Unlike the real intel_timeline, this doesn't
+	 * provide perfect atomic in-order guarantees if the client races
+	 * with itself by calling execbuffer2 twice concurrently.  However,
+	 * if userspace races with itself, that's not likely to yield well-
+	 * defined results anyway so we choose to not care.
+	 */
+	struct drm_syncobj *syncobj;
+
+	/**
+	 * @vm: unique address space (GTT)
+	 *
+	 * In full-ppgtt mode, each context has its own address space ensuring
+	 * complete seperation of one client from all others.
+	 *
+	 * In other modes, this is a NULL pointer with the expectation that
+	 * the caller uses the shared global GTT.
+	 */
+	struct i915_address_space *vm;
+
+	/**
+	 * @pid: process id of creator
+	 *
+	 * Note that who created the context may not be the principle user,
+	 * as the context may be shared across a local socket. However,
+	 * that should only affect the default context, all contexts created
+	 * explicitly by the client are expected to be isolated.
+	 */
+	struct pid *pid;
+
+	/** @link: place with &drm_i915_private.context_list */
+	struct list_head link;
+
+	/** @client: struct i915_drm_client */
+	struct i915_drm_client *client;
+
+	/** @client_link: for linking onto &i915_drm_client.ctx_list */
+	struct list_head client_link;
+
+	/**
+	 * @ref: reference count
+	 *
+	 * A reference to a context is held by both the client who created it
+	 * and on each request submitted to the hardware using the request
+	 * (to ensure the hardware has access to the state until it has
+	 * finished all pending writes). See i915_gem_context_get() and
+	 * i915_gem_context_put() for access.
+	 */
+	struct kref ref;
+
+	/**
+	 * @release_work:
+	 *
+	 * Work item for deferred cleanup, since i915_gem_context_put() tends to
+	 * be called from hardirq context.
+	 *
+	 * FIXME: The only real reason for this is &i915_gem_engines.fence, all
+	 * other callers are from process context and need at most some mild
+	 * shuffling to pull the i915_gem_context_put() call out of a spinlock.
+	 */
+	struct work_struct release_work;
+
+	/**
+	 * @rcu: rcu_head for deferred freeing.
+	 */
+	struct rcu_head rcu;
+
+	/**
+	 * @user_flags: small set of booleans controlled by the user
+	 */
+	unsigned long user_flags;
+#define UCONTEXT_NO_ERROR_CAPTURE	1
+#define UCONTEXT_BANNABLE		2
+#define UCONTEXT_RECOVERABLE		3
+#define UCONTEXT_PERSISTENCE		4
+
+	/**
+	 * @flags: small set of booleans
+	 */
+	unsigned long flags;
+#define CONTEXT_CLOSED			0
+#define CONTEXT_USER_ENGINES		1
+
+	/**
+	 * @uses_protected_content: context uses PXP-encrypted objects.
+	 *
+	 * This flag can only be set at ctx creation time and it's immutable for
+	 * the lifetime of the context. See I915_CONTEXT_PARAM_PROTECTED_CONTENT
+	 * in uapi/drm/i915_drm.h for more info on setting restrictions and
+	 * expected behaviour of marked contexts.
+	 */
+	bool uses_protected_content;
+
+	/**
+	 * @pxp_wakeref: wakeref to keep the device awake when PXP is in use
+	 *
+	 * PXP sessions are invalidated when the device is suspended, which in
+	 * turns invalidates all contexts and objects using it. To keep the
+	 * flow simple, we keep the device awake when contexts using PXP objects
+	 * are in use. It is expected that the userspace application only uses
+	 * PXP when the display is on, so taking a wakeref here shouldn't worsen
+	 * our power metrics.
+	 */
+	intel_wakeref_t pxp_wakeref;
+
+	/** @mutex: guards everything that isn't engines or handles_vma */
+	struct mutex mutex;
+
+	/** @sched: scheduler parameters */
+	struct i915_sched_attr sched;
+
+	/** @guilty_count: How many times this context has caused a GPU hang. */
+	atomic_t guilty_count;
+	/**
+	 * @active_count: How many times this context was active during a GPU
+	 * hang, but did not cause it.
+	 */
+	atomic_t active_count;
+
+	/**
+	 * @hang_timestamp: The last time(s) this context caused a GPU hang
+	 */
+	unsigned long hang_timestamp[2];
+#define CONTEXT_FAST_HANG_JIFFIES (120 * HZ) /* 3 hangs within 120s? Banned! */
+
+	/** @remap_slice: Bitmask of cache lines that need remapping */
+	u8 remap_slice;
+
+	/**
+	 * @handles_vma: rbtree to look up our context specific obj/vma for
+	 * the user handle. (user handles are per fd, but the binding is
+	 * per vm, which may be one per context or shared with the global GTT)
+	 */
+	struct radix_tree_root handles_vma;
+
+	/** @lut_mutex: Locks handles_vma */
+	struct mutex lut_mutex;
+
+	/**
+	 * @name: arbitrary name, used for user debug
+	 *
+	 * A name is constructed for the context from the creator's process
+	 * name, pid and user handle in order to uniquely identify the
+	 * context in messages.
+	 */
+	char name[TASK_COMM_LEN + 8];
+
+	/** @stale: tracks stale engines to be destroyed */
+	struct {
+		/** @lock: guards engines */
+		spinlock_t lock;
+		/** @engines: list of stale engines */
+		struct list_head engines;
+	} stale;
+};
+
+#endif /* __I915_GEM_CONTEXT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.c b/drivers/gpu/drm/i915/gem/i915_gem_create.c
new file mode 100644
index 0000000000..d24c0ce880
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.c
@@ -0,0 +1,498 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020 Intel Corporation
+ */
+
+#include <drm/drm_fourcc.h>
+
+#include "display/intel_display.h"
+#include "gem/i915_gem_ioctls.h"
+#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_region.h"
+#include "pxp/intel_pxp.h"
+
+#include "i915_drv.h"
+#include "i915_gem_create.h"
+#include "i915_trace.h"
+#include "i915_user_extensions.h"
+
+static u32 object_max_page_size(struct intel_memory_region **placements,
+				unsigned int n_placements)
+{
+	u32 max_page_size = 0;
+	int i;
+
+	for (i = 0; i < n_placements; i++) {
+		struct intel_memory_region *mr = placements[i];
+
+		GEM_BUG_ON(!is_power_of_2(mr->min_page_size));
+		max_page_size = max_t(u32, max_page_size, mr->min_page_size);
+	}
+
+	GEM_BUG_ON(!max_page_size);
+	return max_page_size;
+}
+
+static int object_set_placements(struct drm_i915_gem_object *obj,
+				 struct intel_memory_region **placements,
+				 unsigned int n_placements)
+{
+	struct intel_memory_region **arr;
+	unsigned int i;
+
+	GEM_BUG_ON(!n_placements);
+
+	/*
+	 * For the common case of one memory region, skip storing an
+	 * allocated array and just point at the region directly.
+	 */
+	if (n_placements == 1) {
+		struct intel_memory_region *mr = placements[0];
+		struct drm_i915_private *i915 = mr->i915;
+
+		obj->mm.placements = &i915->mm.regions[mr->id];
+		obj->mm.n_placements = 1;
+	} else {
+		arr = kmalloc_array(n_placements,
+				    sizeof(struct intel_memory_region *),
+				    GFP_KERNEL);
+		if (!arr)
+			return -ENOMEM;
+
+		for (i = 0; i < n_placements; i++)
+			arr[i] = placements[i];
+
+		obj->mm.placements = arr;
+		obj->mm.n_placements = n_placements;
+	}
+
+	return 0;
+}
+
+static int i915_gem_publish(struct drm_i915_gem_object *obj,
+			    struct drm_file *file,
+			    u64 *size_p,
+			    u32 *handle_p)
+{
+	u64 size = obj->base.size;
+	int ret;
+
+	ret = drm_gem_handle_create(file, &obj->base, handle_p);
+	/* drop reference from allocate - handle holds it now */
+	i915_gem_object_put(obj);
+	if (ret)
+		return ret;
+
+	*size_p = size;
+	return 0;
+}
+
+static struct drm_i915_gem_object *
+__i915_gem_object_create_user_ext(struct drm_i915_private *i915, u64 size,
+				  struct intel_memory_region **placements,
+				  unsigned int n_placements,
+				  unsigned int ext_flags)
+{
+	struct intel_memory_region *mr = placements[0];
+	struct drm_i915_gem_object *obj;
+	unsigned int flags;
+	int ret;
+
+	i915_gem_flush_free_objects(i915);
+
+	size = round_up(size, object_max_page_size(placements, n_placements));
+	if (size == 0)
+		return ERR_PTR(-EINVAL);
+
+	/* For most of the ABI (e.g. mmap) we think in system pages */
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+	if (i915_gem_object_size_2big(size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	ret = object_set_placements(obj, placements, n_placements);
+	if (ret)
+		goto object_free;
+
+	/*
+	 * I915_BO_ALLOC_USER will make sure the object is cleared before
+	 * any user access.
+	 */
+	flags = I915_BO_ALLOC_USER;
+
+	ret = mr->ops->init_object(mr, obj, I915_BO_INVALID_OFFSET, size, 0, flags);
+	if (ret)
+		goto object_free;
+
+	GEM_BUG_ON(size != obj->base.size);
+
+	/* Add any flag set by create_ext options */
+	obj->flags |= ext_flags;
+
+	trace_i915_gem_object_create(obj);
+	return obj;
+
+object_free:
+	if (obj->mm.n_placements > 1)
+		kfree(obj->mm.placements);
+	i915_gem_object_free(obj);
+	return ERR_PTR(ret);
+}
+
+/**
+ * __i915_gem_object_create_user - Creates a new object using the same path as
+ *                                 DRM_I915_GEM_CREATE_EXT
+ * @i915: i915 private
+ * @size: size of the buffer, in bytes
+ * @placements: possible placement regions, in priority order
+ * @n_placements: number of possible placement regions
+ *
+ * This function is exposed primarily for selftests and does very little
+ * error checking.  It is assumed that the set of placement regions has
+ * already been verified to be valid.
+ */
+struct drm_i915_gem_object *
+__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size,
+			      struct intel_memory_region **placements,
+			      unsigned int n_placements)
+{
+	return __i915_gem_object_create_user_ext(i915, size, placements,
+						 n_placements, 0);
+}
+
+int
+i915_gem_dumb_create(struct drm_file *file,
+		     struct drm_device *dev,
+		     struct drm_mode_create_dumb *args)
+{
+	struct drm_i915_gem_object *obj;
+	struct intel_memory_region *mr;
+	enum intel_memory_type mem_type;
+	int cpp = DIV_ROUND_UP(args->bpp, 8);
+	u32 format;
+
+	switch (cpp) {
+	case 1:
+		format = DRM_FORMAT_C8;
+		break;
+	case 2:
+		format = DRM_FORMAT_RGB565;
+		break;
+	case 4:
+		format = DRM_FORMAT_XRGB8888;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* have to work out size/pitch and return them */
+	args->pitch = ALIGN(args->width * cpp, 64);
+
+	/* align stride to page size so that we can remap */
+	if (args->pitch > intel_plane_fb_max_stride(to_i915(dev), format,
+						    DRM_FORMAT_MOD_LINEAR))
+		args->pitch = ALIGN(args->pitch, 4096);
+
+	if (args->pitch < args->width)
+		return -EINVAL;
+
+	args->size = mul_u32_u32(args->pitch, args->height);
+
+	mem_type = INTEL_MEMORY_SYSTEM;
+	if (HAS_LMEM(to_i915(dev)))
+		mem_type = INTEL_MEMORY_LOCAL;
+
+	mr = intel_memory_region_by_type(to_i915(dev), mem_type);
+
+	obj = __i915_gem_object_create_user(to_i915(dev), args->size, &mr, 1);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	return i915_gem_publish(obj, file, &args->size, &args->handle);
+}
+
+/**
+ * i915_gem_create_ioctl - Creates a new mm object and returns a handle to it.
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
+ */
+int
+i915_gem_create_ioctl(struct drm_device *dev, void *data,
+		      struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_create *args = data;
+	struct drm_i915_gem_object *obj;
+	struct intel_memory_region *mr;
+
+	mr = intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM);
+
+	obj = __i915_gem_object_create_user(i915, args->size, &mr, 1);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	return i915_gem_publish(obj, file, &args->size, &args->handle);
+}
+
+struct create_ext {
+	struct drm_i915_private *i915;
+	struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
+	unsigned int n_placements;
+	unsigned int placement_mask;
+	unsigned long flags;
+	unsigned int pat_index;
+};
+
+static void repr_placements(char *buf, size_t size,
+			    struct intel_memory_region **placements,
+			    int n_placements)
+{
+	int i;
+
+	buf[0] = '\0';
+
+	for (i = 0; i < n_placements; i++) {
+		struct intel_memory_region *mr = placements[i];
+		int r;
+
+		r = snprintf(buf, size, "\n  %s -> { class: %d, inst: %d }",
+			     mr->name, mr->type, mr->instance);
+		if (r >= size)
+			return;
+
+		buf += r;
+		size -= r;
+	}
+}
+
+static int set_placements(struct drm_i915_gem_create_ext_memory_regions *args,
+			  struct create_ext *ext_data)
+{
+	struct drm_i915_private *i915 = ext_data->i915;
+	struct drm_i915_gem_memory_class_instance __user *uregions =
+		u64_to_user_ptr(args->regions);
+	struct intel_memory_region *placements[INTEL_REGION_UNKNOWN];
+	u32 mask;
+	int i, ret = 0;
+
+	if (args->pad) {
+		drm_dbg(&i915->drm, "pad should be zero\n");
+		ret = -EINVAL;
+	}
+
+	if (!args->num_regions) {
+		drm_dbg(&i915->drm, "num_regions is zero\n");
+		ret = -EINVAL;
+	}
+
+	BUILD_BUG_ON(ARRAY_SIZE(i915->mm.regions) != ARRAY_SIZE(placements));
+	BUILD_BUG_ON(ARRAY_SIZE(ext_data->placements) != ARRAY_SIZE(placements));
+	if (args->num_regions > ARRAY_SIZE(i915->mm.regions)) {
+		drm_dbg(&i915->drm, "num_regions is too large\n");
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	mask = 0;
+	for (i = 0; i < args->num_regions; i++) {
+		struct drm_i915_gem_memory_class_instance region;
+		struct intel_memory_region *mr;
+
+		if (copy_from_user(&region, uregions, sizeof(region)))
+			return -EFAULT;
+
+		mr = intel_memory_region_lookup(i915,
+						region.memory_class,
+						region.memory_instance);
+		if (!mr || mr->private) {
+			drm_dbg(&i915->drm, "Device is missing region { class: %d, inst: %d } at index = %d\n",
+				region.memory_class, region.memory_instance, i);
+			ret = -EINVAL;
+			goto out_dump;
+		}
+
+		if (mask & BIT(mr->id)) {
+			drm_dbg(&i915->drm, "Found duplicate placement %s -> { class: %d, inst: %d } at index = %d\n",
+				mr->name, region.memory_class,
+				region.memory_instance, i);
+			ret = -EINVAL;
+			goto out_dump;
+		}
+
+		placements[i] = mr;
+		mask |= BIT(mr->id);
+
+		++uregions;
+	}
+
+	if (ext_data->n_placements) {
+		ret = -EINVAL;
+		goto out_dump;
+	}
+
+	ext_data->n_placements = args->num_regions;
+	for (i = 0; i < args->num_regions; i++)
+		ext_data->placements[i] = placements[i];
+
+	ext_data->placement_mask = mask;
+	return 0;
+
+out_dump:
+	if (1) {
+		char buf[256];
+
+		if (ext_data->n_placements) {
+			repr_placements(buf,
+					sizeof(buf),
+					ext_data->placements,
+					ext_data->n_placements);
+			drm_dbg(&i915->drm,
+				"Placements were already set in previous EXT. Existing placements: %s\n",
+				buf);
+		}
+
+		repr_placements(buf, sizeof(buf), placements, i);
+		drm_dbg(&i915->drm, "New placements(so far validated): %s\n", buf);
+	}
+
+	return ret;
+}
+
+static int ext_set_placements(struct i915_user_extension __user *base,
+			      void *data)
+{
+	struct drm_i915_gem_create_ext_memory_regions ext;
+
+	if (copy_from_user(&ext, base, sizeof(ext)))
+		return -EFAULT;
+
+	return set_placements(&ext, data);
+}
+
+static int ext_set_protected(struct i915_user_extension __user *base, void *data)
+{
+	struct drm_i915_gem_create_ext_protected_content ext;
+	struct create_ext *ext_data = data;
+
+	if (copy_from_user(&ext, base, sizeof(ext)))
+		return -EFAULT;
+
+	if (ext.flags)
+		return -EINVAL;
+
+	if (!intel_pxp_is_enabled(ext_data->i915->pxp))
+		return -ENODEV;
+
+	ext_data->flags |= I915_BO_PROTECTED;
+
+	return 0;
+}
+
+static int ext_set_pat(struct i915_user_extension __user *base, void *data)
+{
+	struct create_ext *ext_data = data;
+	struct drm_i915_private *i915 = ext_data->i915;
+	struct drm_i915_gem_create_ext_set_pat ext;
+	unsigned int max_pat_index;
+
+	BUILD_BUG_ON(sizeof(struct drm_i915_gem_create_ext_set_pat) !=
+		     offsetofend(struct drm_i915_gem_create_ext_set_pat, rsvd));
+
+	/* Limiting the extension only to Meteor Lake */
+	if (!IS_METEORLAKE(i915))
+		return -ENODEV;
+
+	if (copy_from_user(&ext, base, sizeof(ext)))
+		return -EFAULT;
+
+	max_pat_index = INTEL_INFO(i915)->max_pat_index;
+
+	if (ext.pat_index > max_pat_index) {
+		drm_dbg(&i915->drm, "PAT index is invalid: %u\n",
+			ext.pat_index);
+		return -EINVAL;
+	}
+
+	ext_data->pat_index = ext.pat_index;
+
+	return 0;
+}
+
+static const i915_user_extension_fn create_extensions[] = {
+	[I915_GEM_CREATE_EXT_MEMORY_REGIONS] = ext_set_placements,
+	[I915_GEM_CREATE_EXT_PROTECTED_CONTENT] = ext_set_protected,
+	[I915_GEM_CREATE_EXT_SET_PAT] = ext_set_pat,
+};
+
+#define PAT_INDEX_NOT_SET	0xffff
+/**
+ * i915_gem_create_ext_ioctl - Creates a new mm object and returns a handle to it.
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
+ */
+int
+i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_create_ext *args = data;
+	struct create_ext ext_data = { .i915 = i915 };
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	if (args->flags & ~I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS)
+		return -EINVAL;
+
+	ext_data.pat_index = PAT_INDEX_NOT_SET;
+	ret = i915_user_extensions(u64_to_user_ptr(args->extensions),
+				   create_extensions,
+				   ARRAY_SIZE(create_extensions),
+				   &ext_data);
+	if (ret)
+		return ret;
+
+	if (!ext_data.n_placements) {
+		ext_data.placements[0] =
+			intel_memory_region_by_type(i915, INTEL_MEMORY_SYSTEM);
+		ext_data.n_placements = 1;
+	}
+
+	if (args->flags & I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS) {
+		if (ext_data.n_placements == 1)
+			return -EINVAL;
+
+		/*
+		 * We always need to be able to spill to system memory, if we
+		 * can't place in the mappable part of LMEM.
+		 */
+		if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))
+			return -EINVAL;
+	} else {
+		if (ext_data.n_placements > 1 ||
+		    ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM)
+			ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;
+	}
+
+	obj = __i915_gem_object_create_user_ext(i915, args->size,
+						ext_data.placements,
+						ext_data.n_placements,
+						ext_data.flags);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	if (ext_data.pat_index != PAT_INDEX_NOT_SET) {
+		i915_gem_object_set_pat_index(obj, ext_data.pat_index);
+		/* Mark pat_index is set by UMD */
+		obj->pat_set_by_user = true;
+	}
+
+	return i915_gem_publish(obj, file, &args->size, &args->handle);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_create.h b/drivers/gpu/drm/i915/gem/i915_gem_create.h
new file mode 100644
index 0000000000..9536aa9060
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_create.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __I915_GEM_CREATE_H__
+#define __I915_GEM_CREATE_H__
+
+struct drm_file;
+struct drm_device;
+struct drm_mode_create_dumb;
+
+int i915_gem_dumb_create(struct drm_file *file_priv,
+			 struct drm_device *dev,
+			 struct drm_mode_create_dumb *args);
+
+#endif /* __I915_GEM_CREATE_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
new file mode 100644
index 0000000000..1df74f7aa3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -0,0 +1,348 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright 2012 Red Hat Inc
+ */
+
+#include <linux/dma-buf.h>
+#include <linux/highmem.h>
+#include <linux/dma-resv.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+
+#include "gem/i915_gem_dmabuf.h"
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+
+MODULE_IMPORT_NS(DMA_BUF);
+
+I915_SELFTEST_DECLARE(static bool force_different_devices;)
+
+static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf)
+{
+	return to_intel_bo(buf->priv);
+}
+
+static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach,
+					     enum dma_data_direction dir)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf);
+	struct sg_table *sgt;
+	struct scatterlist *src, *dst;
+	int ret, i;
+
+	/*
+	 * Make a copy of the object's sgt, so that we can make an independent
+	 * mapping
+	 */
+	sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
+	if (!sgt) {
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	ret = sg_alloc_table(sgt, obj->mm.pages->orig_nents, GFP_KERNEL);
+	if (ret)
+		goto err_free;
+
+	dst = sgt->sgl;
+	for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->orig_nents, i) {
+		sg_set_page(dst, sg_page(src), src->length, 0);
+		dst = sg_next(dst);
+	}
+
+	ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC);
+	if (ret)
+		goto err_free_sg;
+
+	return sgt;
+
+err_free_sg:
+	sg_free_table(sgt);
+err_free:
+	kfree(sgt);
+err:
+	return ERR_PTR(ret);
+}
+
+static int i915_gem_dmabuf_vmap(struct dma_buf *dma_buf,
+				struct iosys_map *map)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	void *vaddr;
+
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	iosys_map_set_vaddr(map, vaddr);
+
+	return 0;
+}
+
+static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf,
+				   struct iosys_map *map)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+}
+
+static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int ret;
+
+	if (obj->base.size < vma->vm_end - vma->vm_start)
+		return -EINVAL;
+
+	if (HAS_LMEM(i915))
+		return drm_gem_prime_mmap(&obj->base, vma);
+
+	if (!obj->base.filp)
+		return -ENODEV;
+
+	ret = call_mmap(obj->base.filp, vma);
+	if (ret)
+		return ret;
+
+	vma_set_file(vma, obj->base.filp);
+
+	return 0;
+}
+
+static int i915_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	bool write = (direction == DMA_BIDIRECTIONAL || direction == DMA_TO_DEVICE);
+	struct i915_gem_ww_ctx ww;
+	int err;
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	err = i915_gem_object_lock(obj, &ww);
+	if (!err)
+		err = i915_gem_object_pin_pages(obj);
+	if (!err) {
+		err = i915_gem_object_set_to_cpu_domain(obj, write);
+		i915_gem_object_unpin_pages(obj);
+	}
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	return err;
+}
+
+static int i915_gem_end_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dma_buf);
+	struct i915_gem_ww_ctx ww;
+	int err;
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	err = i915_gem_object_lock(obj, &ww);
+	if (!err)
+		err = i915_gem_object_pin_pages(obj);
+	if (!err) {
+		err = i915_gem_object_set_to_gtt_domain(obj, false);
+		i915_gem_object_unpin_pages(obj);
+	}
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	return err;
+}
+
+static int i915_gem_dmabuf_attach(struct dma_buf *dmabuf,
+				  struct dma_buf_attachment *attach)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
+	struct i915_gem_ww_ctx ww;
+	int err;
+
+	if (!i915_gem_object_can_migrate(obj, INTEL_REGION_SMEM))
+		return -EOPNOTSUPP;
+
+	for_i915_gem_ww(&ww, err, true) {
+		err = i915_gem_object_lock(obj, &ww);
+		if (err)
+			continue;
+
+		err = i915_gem_object_migrate(obj, &ww, INTEL_REGION_SMEM);
+		if (err)
+			continue;
+
+		err = i915_gem_object_wait_migration(obj, 0);
+		if (err)
+			continue;
+
+		err = i915_gem_object_pin_pages(obj);
+	}
+
+	return err;
+}
+
+static void i915_gem_dmabuf_detach(struct dma_buf *dmabuf,
+				   struct dma_buf_attachment *attach)
+{
+	struct drm_i915_gem_object *obj = dma_buf_to_obj(dmabuf);
+
+	i915_gem_object_unpin_pages(obj);
+}
+
+static const struct dma_buf_ops i915_dmabuf_ops =  {
+	.attach = i915_gem_dmabuf_attach,
+	.detach = i915_gem_dmabuf_detach,
+	.map_dma_buf = i915_gem_map_dma_buf,
+	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.release = drm_gem_dmabuf_release,
+	.mmap = i915_gem_dmabuf_mmap,
+	.vmap = i915_gem_dmabuf_vmap,
+	.vunmap = i915_gem_dmabuf_vunmap,
+	.begin_cpu_access = i915_gem_begin_cpu_access,
+	.end_cpu_access = i915_gem_end_cpu_access,
+};
+
+struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+
+	exp_info.ops = &i915_dmabuf_ops;
+	exp_info.size = gem_obj->size;
+	exp_info.flags = flags;
+	exp_info.priv = gem_obj;
+	exp_info.resv = obj->base.resv;
+
+	if (obj->ops->dmabuf_export) {
+		int ret = obj->ops->dmabuf_export(obj);
+		if (ret)
+			return ERR_PTR(ret);
+	}
+
+	return drm_gem_dmabuf_export(gem_obj->dev, &exp_info);
+}
+
+static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *sgt;
+
+	assert_object_held(obj);
+
+	sgt = dma_buf_map_attachment(obj->base.import_attach,
+				     DMA_BIDIRECTIONAL);
+	if (IS_ERR(sgt))
+		return PTR_ERR(sgt);
+
+	/*
+	 * DG1 is special here since it still snoops transactions even with
+	 * CACHE_NONE. This is not the case with other HAS_SNOOP platforms. We
+	 * might need to revisit this as we add new discrete platforms.
+	 *
+	 * XXX: Consider doing a vmap flush or something, where possible.
+	 * Currently we just do a heavy handed wbinvd_on_all_cpus() here since
+	 * the underlying sg_table might not even point to struct pages, so we
+	 * can't just call drm_clflush_sg or similar, like we do elsewhere in
+	 * the driver.
+	 */
+	if (i915_gem_object_can_bypass_llc(obj) ||
+	    (!HAS_LLC(i915) && !IS_DG1(i915)))
+		wbinvd_on_all_cpus();
+
+	__i915_gem_object_set_pages(obj, sgt);
+
+	return 0;
+}
+
+static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj,
+					     struct sg_table *sgt)
+{
+	dma_buf_unmap_attachment(obj->base.import_attach, sgt,
+				 DMA_BIDIRECTIONAL);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_dmabuf_ops = {
+	.name = "i915_gem_object_dmabuf",
+	.get_pages = i915_gem_object_get_pages_dmabuf,
+	.put_pages = i915_gem_object_put_pages_dmabuf,
+};
+
+struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
+					     struct dma_buf *dma_buf)
+{
+	static struct lock_class_key lock_class;
+	struct dma_buf_attachment *attach;
+	struct drm_i915_gem_object *obj;
+	int ret;
+
+	/* is this one of own objects? */
+	if (dma_buf->ops == &i915_dmabuf_ops) {
+		obj = dma_buf_to_obj(dma_buf);
+		/* is it from our device? */
+		if (obj->base.dev == dev &&
+		    !I915_SELFTEST_ONLY(force_different_devices)) {
+			/*
+			 * Importing dmabuf exported from out own gem increases
+			 * refcount on gem itself instead of f_count of dmabuf.
+			 */
+			return &i915_gem_object_get(obj)->base;
+		}
+	}
+
+	if (i915_gem_object_size_2big(dma_buf->size))
+		return ERR_PTR(-E2BIG);
+
+	/* need to attach */
+	attach = dma_buf_attach(dma_buf, dev->dev);
+	if (IS_ERR(attach))
+		return ERR_CAST(attach);
+
+	get_dma_buf(dma_buf);
+
+	obj = i915_gem_object_alloc();
+	if (!obj) {
+		ret = -ENOMEM;
+		goto fail_detach;
+	}
+
+	drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
+	i915_gem_object_init(obj, &i915_gem_object_dmabuf_ops, &lock_class,
+			     I915_BO_ALLOC_USER);
+	obj->base.import_attach = attach;
+	obj->base.resv = dma_buf->resv;
+
+	/* We use GTT as shorthand for a coherent domain, one that is
+	 * neither in the GPU cache nor in the CPU cache, where all
+	 * writes are immediately visible in memory. (That's not strictly
+	 * true, but it's close! There are internal buffers such as the
+	 * write-combined buffer or a delay through the chipset for GTT
+	 * writes that do require us to treat GTT as a separate cache domain.)
+	 */
+	obj->read_domains = I915_GEM_DOMAIN_GTT;
+	obj->write_domain = 0;
+
+	return &obj->base;
+
+fail_detach:
+	dma_buf_detach(dma_buf, attach);
+	dma_buf_put(dma_buf);
+
+	return ERR_PTR(ret);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/mock_dmabuf.c"
+#include "selftests/i915_gem_dmabuf.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.h b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.h
new file mode 100644
index 0000000000..6e0405d47c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_GEM_DMABUF_H__
+#define __I915_GEM_DMABUF_H__
+
+struct drm_gem_object;
+struct drm_device;
+struct dma_buf;
+
+struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
+					     struct dma_buf *dma_buf);
+
+struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags);
+
+#endif /* __I915_GEM_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
new file mode 100644
index 0000000000..ffddec1d2a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -0,0 +1,781 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include "display/intel_display.h"
+#include "display/intel_frontbuffer.h"
+#include "gt/intel_gt.h"
+
+#include "i915_drv.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_domain.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_lmem.h"
+#include "i915_gem_mman.h"
+#include "i915_gem_object.h"
+#include "i915_vma.h"
+
+#define VTD_GUARD (168u * I915_GTT_PAGE_SIZE) /* 168 or tile-row PTE padding */
+
+static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	if (IS_DGFX(i915))
+		return false;
+
+	/*
+	 * For objects created by userspace through GEM_CREATE with pat_index
+	 * set by set_pat extension, i915_gem_object_has_cache_level() will
+	 * always return true, because the coherency of such object is managed
+	 * by userspace. Othereise the call here would fall back to checking
+	 * whether the object is un-cached or write-through.
+	 */
+	return !(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
+		 i915_gem_object_has_cache_level(obj, I915_CACHE_WT));
+}
+
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	if (obj->cache_dirty)
+		return false;
+
+	if (IS_DGFX(i915))
+		return false;
+
+	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+		return true;
+
+	/* Currently in use by HW (display engine)? Keep flushed. */
+	return i915_gem_object_is_framebuffer(obj);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+	struct i915_vma *vma;
+
+	assert_object_held(obj);
+
+	if (!(obj->write_domain & flush_domains))
+		return;
+
+	switch (obj->write_domain) {
+	case I915_GEM_DOMAIN_GTT:
+		spin_lock(&obj->vma.lock);
+		for_each_ggtt_vma(vma, obj)
+			i915_vma_flush_writes(vma);
+		spin_unlock(&obj->vma.lock);
+
+		i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
+		break;
+
+	case I915_GEM_DOMAIN_WC:
+		wmb();
+		break;
+
+	case I915_GEM_DOMAIN_CPU:
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		break;
+
+	case I915_GEM_DOMAIN_RENDER:
+		if (gpu_write_needs_clflush(obj))
+			obj->cache_dirty = true;
+		break;
+	}
+
+	obj->write_domain = 0;
+}
+
+static void __i915_gem_object_flush_for_display(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * We manually flush the CPU domain so that we can override and
+	 * force the flush for the display, and perform it asyncrhonously.
+	 */
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+	if (obj->cache_dirty)
+		i915_gem_clflush_object(obj, I915_CLFLUSH_FORCE);
+	obj->write_domain = 0;
+}
+
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj)
+{
+	if (!i915_gem_object_is_framebuffer(obj))
+		return;
+
+	i915_gem_object_lock(obj, NULL);
+	__i915_gem_object_flush_for_display(obj);
+	i915_gem_object_unlock(obj);
+}
+
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj)
+{
+	if (i915_gem_object_is_framebuffer(obj))
+		__i915_gem_object_flush_for_display(obj);
+}
+
+/**
+ * i915_gem_object_set_to_wc_domain - Moves a single object to the WC read, and
+ *                                    possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_WC)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_WC);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * WC domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_WC) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_WC) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_WC;
+	if (write) {
+		obj->read_domains = I915_GEM_DOMAIN_WC;
+		obj->write_domain = I915_GEM_DOMAIN_WC;
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * i915_gem_object_set_to_gtt_domain - Moves a single object to the GTT read,
+ *                                     and possibly write domain.
+ * @obj: object to act on
+ * @write: ask for write access or read only
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	if (obj->write_domain == I915_GEM_DOMAIN_GTT)
+		return 0;
+
+	/* Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_GTT);
+
+	/* Serialise direct access to this object with the barriers for
+	 * coherent writes from the GPU, by effectively invalidating the
+	 * GTT domain upon first access.
+	 */
+	if ((obj->read_domains & I915_GEM_DOMAIN_GTT) == 0)
+		mb();
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
+	obj->read_domains |= I915_GEM_DOMAIN_GTT;
+	if (write) {
+		struct i915_vma *vma;
+
+		obj->read_domains = I915_GEM_DOMAIN_GTT;
+		obj->write_domain = I915_GEM_DOMAIN_GTT;
+		obj->mm.dirty = true;
+
+		spin_lock(&obj->vma.lock);
+		for_each_ggtt_vma(vma, obj)
+			if (i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
+				i915_vma_set_ggtt_write(vma);
+		spin_unlock(&obj->vma.lock);
+	}
+
+	i915_gem_object_unpin_pages(obj);
+	return 0;
+}
+
+/**
+ * i915_gem_object_set_cache_level - Changes the cache-level of an object across all VMA.
+ * @obj: object to act on
+ * @cache_level: new cache level to set for the object
+ *
+ * After this function returns, the object will be in the new cache-level
+ * across all GTT and the contents of the backing storage will be coherent,
+ * with respect to the new cache-level. In order to keep the backing storage
+ * coherent for all users, we only allow a single cache level to be set
+ * globally on the object and prevent it from being changed whilst the
+ * hardware is reading from the object. That is if the object is currently
+ * on the scanout it will be set to uncached (or equivalent display
+ * cache coherency) and all non-MOCS GPU access will also be uncached so
+ * that all direct access to the scanout remains coherent.
+ */
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level)
+{
+	int ret;
+
+	/*
+	 * For objects created by userspace through GEM_CREATE with pat_index
+	 * set by set_pat extension, simply return 0 here without touching
+	 * the cache setting, because such objects should have an immutable
+	 * cache setting by desgin and always managed by userspace.
+	 */
+	if (i915_gem_object_has_cache_level(obj, cache_level))
+		return 0;
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	/* Always invalidate stale cachelines */
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+	obj->cache_dirty = true;
+
+	/* The cache-level will be applied when each vma is rebound. */
+	return i915_gem_object_unbind(obj,
+				      I915_GEM_OBJECT_UNBIND_ACTIVE |
+				      I915_GEM_OBJECT_UNBIND_BARRIER);
+}
+
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	int err = 0;
+
+	if (IS_DGFX(to_i915(dev)))
+		return -ENODEV;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (!obj) {
+		err = -ENOENT;
+		goto out;
+	}
+
+	/*
+	 * This ioctl should be disabled for the objects with pat_index
+	 * set by user space.
+	 */
+	if (obj->pat_set_by_user) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (i915_gem_object_has_cache_level(obj, I915_CACHE_LLC) ||
+	    i915_gem_object_has_cache_level(obj, I915_CACHE_L3_LLC))
+		args->caching = I915_CACHING_CACHED;
+	else if (i915_gem_object_has_cache_level(obj, I915_CACHE_WT))
+		args->caching = I915_CACHING_DISPLAY;
+	else
+		args->caching = I915_CACHING_NONE;
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_caching *args = data;
+	struct drm_i915_gem_object *obj;
+	enum i915_cache_level level;
+	int ret = 0;
+
+	if (IS_DGFX(i915))
+		return -ENODEV;
+
+	if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70))
+		return -EOPNOTSUPP;
+
+	switch (args->caching) {
+	case I915_CACHING_NONE:
+		level = I915_CACHE_NONE;
+		break;
+	case I915_CACHING_CACHED:
+		/*
+		 * Due to a HW issue on BXT A stepping, GPU stores via a
+		 * snooped mapping may leave stale data in a corresponding CPU
+		 * cacheline, whereas normally such cachelines would get
+		 * invalidated.
+		 */
+		if (!HAS_LLC(i915) && !HAS_SNOOP(i915))
+			return -ENODEV;
+
+		level = I915_CACHE_LLC;
+		break;
+	case I915_CACHING_DISPLAY:
+		level = HAS_WT(i915) ? I915_CACHE_WT : I915_CACHE_NONE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * This ioctl should be disabled for the objects with pat_index
+	 * set by user space.
+	 */
+	if (obj->pat_set_by_user) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	/*
+	 * The caching mode of proxy object is handled by its generator, and
+	 * not allowed to be changed by userspace.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		/*
+		 * Silently allow cached for userptr; the vulkan driver
+		 * sets all objects to cached
+		 */
+		if (!i915_gem_object_is_userptr(obj) ||
+		    args->caching != I915_CACHING_CACHED)
+			ret = -ENXIO;
+
+		goto out;
+	}
+
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
+	if (ret)
+		goto out;
+
+	ret = i915_gem_object_set_cache_level(obj, level);
+	i915_gem_object_unlock(obj);
+
+out:
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/*
+ * Prepare buffer for display plane (scanout, cursors, etc). Can be called from
+ * an uninterruptible phase (modesetting) and allows any flushes to be pipelined
+ * (for pageflips). We only flush the caches while preparing the buffer for
+ * display, the callers are responsible for frontbuffer flush.
+ */
+struct i915_vma *
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     struct i915_gem_ww_ctx *ww,
+				     u32 alignment,
+				     const struct i915_gtt_view *view,
+				     unsigned int flags)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+	int ret;
+
+	/* Frame buffer must be in LMEM */
+	if (HAS_LMEM(i915) && !i915_gem_object_is_lmem(obj))
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * The display engine is not coherent with the LLC cache on gen6.  As
+	 * a result, we make sure that the pinning that is about to occur is
+	 * done with uncached PTEs. This is lowest common denominator for all
+	 * chipsets.
+	 *
+	 * However for gen6+, we could do better by using the GFDT bit instead
+	 * of uncaching, which would allow us to flush all the LLC-cached data
+	 * with that bit in the PTE to main memory with just one PIPE_CONTROL.
+	 */
+	ret = i915_gem_object_set_cache_level(obj,
+					      HAS_WT(i915) ?
+					      I915_CACHE_WT : I915_CACHE_NONE);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/* VT-d may overfetch before/after the vma, so pad with scratch */
+	if (intel_scanout_needs_vtd_wa(i915)) {
+		unsigned int guard = VTD_GUARD;
+
+		if (i915_gem_object_is_tiled(obj))
+			guard = max(guard,
+				    i915_gem_object_get_tile_row_size(obj));
+
+		flags |= PIN_OFFSET_GUARD | guard;
+	}
+
+	/*
+	 * As the user may map the buffer once pinned in the display plane
+	 * (e.g. libkms for the bootup splash), we have to ensure that we
+	 * always use map_and_fenceable for all scanout buffers. However,
+	 * it may simply be too big to fit into mappable, in which case
+	 * put it anyway and hope that userspace can cope (but always first
+	 * try to preserve the existing ABI).
+	 */
+	vma = ERR_PTR(-ENOSPC);
+	if ((flags & PIN_MAPPABLE) == 0 &&
+	    (!view || view->type == I915_GTT_VIEW_NORMAL))
+		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0, alignment,
+						  flags | PIN_MAPPABLE |
+						  PIN_NONBLOCK);
+	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK))
+		vma = i915_gem_object_ggtt_pin_ww(obj, ww, view, 0,
+						  alignment, flags);
+	if (IS_ERR(vma))
+		return vma;
+
+	vma->display_alignment = max(vma->display_alignment, alignment);
+	i915_vma_mark_scanout(vma);
+
+	i915_gem_object_flush_if_display_locked(obj);
+
+	return vma;
+}
+
+/**
+ * i915_gem_object_set_to_cpu_domain - Moves a single object to the CPU read,
+ *                                     and possibly write domain.
+ * @obj: object to act on
+ * @write: requesting write or read-only access
+ *
+ * This function returns when the move is complete, including waiting on
+ * flushes to occur.
+ */
+int
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
+{
+	int ret;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   (write ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* Flush the CPU cache if it's still invalid. */
+	if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
+		i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
+		obj->read_domains |= I915_GEM_DOMAIN_CPU;
+	}
+
+	/* It should now be out of any other write domains, and we can update
+	 * the domain values for our changes.
+	 */
+	GEM_BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're writing through the CPU, then the GPU read domains will
+	 * need to be invalidated at next use.
+	 */
+	if (write)
+		__start_cpu_write(obj);
+
+	return 0;
+}
+
+/**
+ * i915_gem_set_domain_ioctl - Called when user space prepares to use an
+ *                             object with the CPU, either
+ * through the mmap ioctl's mapping or a GTT mapping.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ */
+int
+i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_set_domain *args = data;
+	struct drm_i915_gem_object *obj;
+	u32 read_domains = args->read_domains;
+	u32 write_domain = args->write_domain;
+	int err;
+
+	if (IS_DGFX(to_i915(dev)))
+		return -ENODEV;
+
+	/* Only handle setting domains to types used by the CPU. */
+	if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS)
+		return -EINVAL;
+
+	/*
+	 * Having something in the write domain implies it's in the read
+	 * domain, and only that read domain.  Enforce that in the request.
+	 */
+	if (write_domain && read_domains != write_domain)
+		return -EINVAL;
+
+	if (!read_domains)
+		return 0;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * Try to flush the object off the GPU without holding the lock.
+	 * We will repeat the flush holding the lock in the normal manner
+	 * to catch cases where we are gazumped.
+	 */
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   (write_domain ? I915_WAIT_ALL : 0),
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out;
+
+	if (i915_gem_object_is_userptr(obj)) {
+		/*
+		 * Try to grab userptr pages, iris uses set_domain to check
+		 * userptr validity
+		 */
+		err = i915_gem_object_userptr_validate(obj);
+		if (!err)
+			err = i915_gem_object_wait(obj,
+						   I915_WAIT_INTERRUPTIBLE |
+						   I915_WAIT_PRIORITY |
+						   (write_domain ? I915_WAIT_ALL : 0),
+						   MAX_SCHEDULE_TIMEOUT);
+		goto out;
+	}
+
+	/*
+	 * Proxy objects do not control access to the backing storage, ergo
+	 * they cannot be used as a means to manipulate the cache domain
+	 * tracking for that backing storage. The proxy object is always
+	 * considered to be outside of any cache domain.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		err = -ENXIO;
+		goto out;
+	}
+
+	err = i915_gem_object_lock_interruptible(obj, NULL);
+	if (err)
+		goto out;
+
+	/*
+	 * Flush and acquire obj->pages so that we are coherent through
+	 * direct access in memory with previous cached writes through
+	 * shmemfs and that our cache domain tracking remains valid.
+	 * For example, if the obj->filp was moved to swap without us
+	 * being notified and releasing the pages, we would mistakenly
+	 * continue to assume that the obj remained out of the CPU cached
+	 * domain.
+	 */
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out_unlock;
+
+	/*
+	 * Already in the desired write domain? Nothing for us to do!
+	 *
+	 * We apply a little bit of cunning here to catch a broader set of
+	 * no-ops. If obj->write_domain is set, we must be in the same
+	 * obj->read_domains, and only that domain. Therefore, if that
+	 * obj->write_domain matches the request read_domains, we are
+	 * already in the same read/write domain and can skip the operation,
+	 * without having to further check the requested write_domain.
+	 */
+	if (READ_ONCE(obj->write_domain) == read_domains)
+		goto out_unpin;
+
+	if (read_domains & I915_GEM_DOMAIN_WC)
+		err = i915_gem_object_set_to_wc_domain(obj, write_domain);
+	else if (read_domains & I915_GEM_DOMAIN_GTT)
+		err = i915_gem_object_set_to_gtt_domain(obj, write_domain);
+	else
+		err = i915_gem_object_set_to_cpu_domain(obj, write_domain);
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+
+out_unlock:
+	i915_gem_object_unlock(obj);
+
+	if (!err && write_domain)
+		i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
+
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+/*
+ * Pins the specified object's pages and synchronizes the object with
+ * GPU accesses. Sets needs_clflush to non-zero if the caller should
+ * flush the object from the CPU cache.
+ */
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush)
+{
+	int ret;
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, false);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu read domain, set ourself into the gtt
+	 * read domain and manually flush cachelines (if required). This
+	 * optimizes for the case when the gpu will dirty the data
+	 * anyway again before the next pread happens.
+	 */
+	if (!obj->cache_dirty &&
+	    !(obj->read_domains & I915_GEM_DOMAIN_CPU))
+		*needs_clflush = CLFLUSH_BEFORE;
+
+out:
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
+
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush)
+{
+	int ret;
+
+	*needs_clflush = 0;
+	if (!i915_gem_object_has_struct_page(obj))
+		return -ENODEV;
+
+	assert_object_held(obj);
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (ret)
+		return ret;
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		return ret;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE ||
+	    !static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+		if (ret)
+			goto err_unpin;
+		else
+			goto out;
+	}
+
+	flush_write_domain(obj, ~I915_GEM_DOMAIN_CPU);
+
+	/* If we're not in the cpu write domain, set ourself into the
+	 * gtt write domain and manually flush cachelines (as required).
+	 * This optimizes for the case when the gpu will use the data
+	 * right away and we therefore have to clflush anyway.
+	 */
+	if (!obj->cache_dirty) {
+		*needs_clflush |= CLFLUSH_AFTER;
+
+		/*
+		 * Same trick applies to invalidate partially written
+		 * cachelines read before writing.
+		 */
+		if (!(obj->read_domains & I915_GEM_DOMAIN_CPU))
+			*needs_clflush |= CLFLUSH_BEFORE;
+	}
+
+out:
+	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
+	obj->mm.dirty = true;
+	/* return with the pages pinned */
+	return 0;
+
+err_unpin:
+	i915_gem_object_unpin_pages(obj);
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.h b/drivers/gpu/drm/i915/gem/i915_gem_domain.h
new file mode 100644
index 0000000000..9622df962b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_GEM_DOMAIN_H__
+#define __I915_GEM_DOMAIN_H__
+
+struct drm_i915_gem_object;
+enum i915_cache_level;
+
+int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
+				    enum i915_cache_level cache_level);
+
+#endif /* __I915_GEM_DOMAIN_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
new file mode 100644
index 0000000000..5a687a3686
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -0,0 +1,3652 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008,2010 Intel Corporation
+ */
+
+#include <linux/dma-resv.h>
+#include <linux/highmem.h>
+#include <linux/sync_file.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_syncobj.h>
+
+#include "display/intel_frontbuffer.h"
+
+#include "gem/i915_gem_ioctls.h"
+#include "gt/intel_context.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_buffer_pool.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
+
+#include "pxp/intel_pxp.h"
+
+#include "i915_cmd_parser.h"
+#include "i915_drv.h"
+#include "i915_file_private.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_context.h"
+#include "i915_gem_evict.h"
+#include "i915_gem_ioctls.h"
+#include "i915_reg.h"
+#include "i915_trace.h"
+#include "i915_user_extensions.h"
+
+struct eb_vma {
+	struct i915_vma *vma;
+	unsigned int flags;
+
+	/** This vma's place in the execbuf reservation list */
+	struct drm_i915_gem_exec_object2 *exec;
+	struct list_head bind_link;
+	struct list_head reloc_link;
+
+	struct hlist_node node;
+	u32 handle;
+};
+
+enum {
+	FORCE_CPU_RELOC = 1,
+	FORCE_GTT_RELOC,
+	FORCE_GPU_RELOC,
+#define DBG_FORCE_RELOC 0 /* choose one of the above! */
+};
+
+/* __EXEC_OBJECT_ flags > BIT(29) defined in i915_vma.h */
+#define __EXEC_OBJECT_HAS_PIN		BIT(29)
+#define __EXEC_OBJECT_HAS_FENCE		BIT(28)
+#define __EXEC_OBJECT_USERPTR_INIT	BIT(27)
+#define __EXEC_OBJECT_NEEDS_MAP		BIT(26)
+#define __EXEC_OBJECT_NEEDS_BIAS	BIT(25)
+#define __EXEC_OBJECT_INTERNAL_FLAGS	(~0u << 25) /* all of the above + */
+#define __EXEC_OBJECT_RESERVED (__EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_FENCE)
+
+#define __EXEC_HAS_RELOC	BIT(31)
+#define __EXEC_ENGINE_PINNED	BIT(30)
+#define __EXEC_USERPTR_USED	BIT(29)
+#define __EXEC_INTERNAL_FLAGS	(~0u << 29)
+#define UPDATE			PIN_OFFSET_FIXED
+
+#define BATCH_OFFSET_BIAS (256*1024)
+
+#define __I915_EXEC_ILLEGAL_FLAGS \
+	(__I915_EXEC_UNKNOWN_FLAGS | \
+	 I915_EXEC_CONSTANTS_MASK  | \
+	 I915_EXEC_RESOURCE_STREAMER)
+
+/* Catch emission of unexpected errors for CI! */
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+#undef EINVAL
+#define EINVAL ({ \
+	DRM_DEBUG_DRIVER("EINVAL at %s:%d\n", __func__, __LINE__); \
+	22; \
+})
+#endif
+
+/**
+ * DOC: User command execution
+ *
+ * Userspace submits commands to be executed on the GPU as an instruction
+ * stream within a GEM object we call a batchbuffer. This instructions may
+ * refer to other GEM objects containing auxiliary state such as kernels,
+ * samplers, render targets and even secondary batchbuffers. Userspace does
+ * not know where in the GPU memory these objects reside and so before the
+ * batchbuffer is passed to the GPU for execution, those addresses in the
+ * batchbuffer and auxiliary objects are updated. This is known as relocation,
+ * or patching. To try and avoid having to relocate each object on the next
+ * execution, userspace is told the location of those objects in this pass,
+ * but this remains just a hint as the kernel may choose a new location for
+ * any object in the future.
+ *
+ * At the level of talking to the hardware, submitting a batchbuffer for the
+ * GPU to execute is to add content to a buffer from which the HW
+ * command streamer is reading.
+ *
+ * 1. Add a command to load the HW context. For Logical Ring Contexts, i.e.
+ *    Execlists, this command is not placed on the same buffer as the
+ *    remaining items.
+ *
+ * 2. Add a command to invalidate caches to the buffer.
+ *
+ * 3. Add a batchbuffer start command to the buffer; the start command is
+ *    essentially a token together with the GPU address of the batchbuffer
+ *    to be executed.
+ *
+ * 4. Add a pipeline flush to the buffer.
+ *
+ * 5. Add a memory write command to the buffer to record when the GPU
+ *    is done executing the batchbuffer. The memory write writes the
+ *    global sequence number of the request, ``i915_request::global_seqno``;
+ *    the i915 driver uses the current value in the register to determine
+ *    if the GPU has completed the batchbuffer.
+ *
+ * 6. Add a user interrupt command to the buffer. This command instructs
+ *    the GPU to issue an interrupt when the command, pipeline flush and
+ *    memory write are completed.
+ *
+ * 7. Inform the hardware of the additional commands added to the buffer
+ *    (by updating the tail pointer).
+ *
+ * Processing an execbuf ioctl is conceptually split up into a few phases.
+ *
+ * 1. Validation - Ensure all the pointers, handles and flags are valid.
+ * 2. Reservation - Assign GPU address space for every object
+ * 3. Relocation - Update any addresses to point to the final locations
+ * 4. Serialisation - Order the request with respect to its dependencies
+ * 5. Construction - Construct a request to execute the batchbuffer
+ * 6. Submission (at some point in the future execution)
+ *
+ * Reserving resources for the execbuf is the most complicated phase. We
+ * neither want to have to migrate the object in the address space, nor do
+ * we want to have to update any relocations pointing to this object. Ideally,
+ * we want to leave the object where it is and for all the existing relocations
+ * to match. If the object is given a new address, or if userspace thinks the
+ * object is elsewhere, we have to parse all the relocation entries and update
+ * the addresses. Userspace can set the I915_EXEC_NORELOC flag to hint that
+ * all the target addresses in all of its objects match the value in the
+ * relocation entries and that they all match the presumed offsets given by the
+ * list of execbuffer objects. Using this knowledge, we know that if we haven't
+ * moved any buffers, all the relocation entries are valid and we can skip
+ * the update. (If userspace is wrong, the likely outcome is an impromptu GPU
+ * hang.) The requirement for using I915_EXEC_NO_RELOC are:
+ *
+ *      The addresses written in the objects must match the corresponding
+ *      reloc.presumed_offset which in turn must match the corresponding
+ *      execobject.offset.
+ *
+ *      Any render targets written to in the batch must be flagged with
+ *      EXEC_OBJECT_WRITE.
+ *
+ *      To avoid stalling, execobject.offset should match the current
+ *      address of that object within the active context.
+ *
+ * The reservation is done is multiple phases. First we try and keep any
+ * object already bound in its current location - so as long as meets the
+ * constraints imposed by the new execbuffer. Any object left unbound after the
+ * first pass is then fitted into any available idle space. If an object does
+ * not fit, all objects are removed from the reservation and the process rerun
+ * after sorting the objects into a priority order (more difficult to fit
+ * objects are tried first). Failing that, the entire VM is cleared and we try
+ * to fit the execbuf once last time before concluding that it simply will not
+ * fit.
+ *
+ * A small complication to all of this is that we allow userspace not only to
+ * specify an alignment and a size for the object in the address space, but
+ * we also allow userspace to specify the exact offset. This objects are
+ * simpler to place (the location is known a priori) all we have to do is make
+ * sure the space is available.
+ *
+ * Once all the objects are in place, patching up the buried pointers to point
+ * to the final locations is a fairly simple job of walking over the relocation
+ * entry arrays, looking up the right address and rewriting the value into
+ * the object. Simple! ... The relocation entries are stored in user memory
+ * and so to access them we have to copy them into a local buffer. That copy
+ * has to avoid taking any pagefaults as they may lead back to a GEM object
+ * requiring the struct_mutex (i.e. recursive deadlock). So once again we split
+ * the relocation into multiple passes. First we try to do everything within an
+ * atomic context (avoid the pagefaults) which requires that we never wait. If
+ * we detect that we may wait, or if we need to fault, then we have to fallback
+ * to a slower path. The slowpath has to drop the mutex. (Can you hear alarm
+ * bells yet?) Dropping the mutex means that we lose all the state we have
+ * built up so far for the execbuf and we must reset any global data. However,
+ * we do leave the objects pinned in their final locations - which is a
+ * potential issue for concurrent execbufs. Once we have left the mutex, we can
+ * allocate and copy all the relocation entries into a large array at our
+ * leisure, reacquire the mutex, reclaim all the objects and other state and
+ * then proceed to update any incorrect addresses with the objects.
+ *
+ * As we process the relocation entries, we maintain a record of whether the
+ * object is being written to. Using NORELOC, we expect userspace to provide
+ * this information instead. We also check whether we can skip the relocation
+ * by comparing the expected value inside the relocation entry with the target's
+ * final address. If they differ, we have to map the current object and rewrite
+ * the 4 or 8 byte pointer within.
+ *
+ * Serialising an execbuf is quite simple according to the rules of the GEM
+ * ABI. Execution within each context is ordered by the order of submission.
+ * Writes to any GEM object are in order of submission and are exclusive. Reads
+ * from a GEM object are unordered with respect to other reads, but ordered by
+ * writes. A write submitted after a read cannot occur before the read, and
+ * similarly any read submitted after a write cannot occur before the write.
+ * Writes are ordered between engines such that only one write occurs at any
+ * time (completing any reads beforehand) - using semaphores where available
+ * and CPU serialisation otherwise. Other GEM access obey the same rules, any
+ * write (either via mmaps using set-domain, or via pwrite) must flush all GPU
+ * reads before starting, and any read (either using set-domain or pread) must
+ * flush all GPU writes before starting. (Note we only employ a barrier before,
+ * we currently rely on userspace not concurrently starting a new execution
+ * whilst reading or writing to an object. This may be an advantage or not
+ * depending on how much you trust userspace not to shoot themselves in the
+ * foot.) Serialisation may just result in the request being inserted into
+ * a DAG awaiting its turn, but most simple is to wait on the CPU until
+ * all dependencies are resolved.
+ *
+ * After all of that, is just a matter of closing the request and handing it to
+ * the hardware (well, leaving it in a queue to be executed). However, we also
+ * offer the ability for batchbuffers to be run with elevated privileges so
+ * that they access otherwise hidden registers. (Used to adjust L3 cache etc.)
+ * Before any batch is given extra privileges we first must check that it
+ * contains no nefarious instructions, we check that each instruction is from
+ * our whitelist and all registers are also from an allowed list. We first
+ * copy the user's batchbuffer to a shadow (so that the user doesn't have
+ * access to it, either by the CPU or GPU as we scan it) and then parse each
+ * instruction. If everything is ok, we set a flag telling the hardware to run
+ * the batchbuffer in trusted mode, otherwise the ioctl is rejected.
+ */
+
+struct eb_fence {
+	struct drm_syncobj *syncobj; /* Use with ptr_mask_bits() */
+	struct dma_fence *dma_fence;
+	u64 value;
+	struct dma_fence_chain *chain_fence;
+};
+
+struct i915_execbuffer {
+	struct drm_i915_private *i915; /** i915 backpointer */
+	struct drm_file *file; /** per-file lookup tables and limits */
+	struct drm_i915_gem_execbuffer2 *args; /** ioctl parameters */
+	struct drm_i915_gem_exec_object2 *exec; /** ioctl execobj[] */
+	struct eb_vma *vma;
+
+	struct intel_gt *gt; /* gt for the execbuf */
+	struct intel_context *context; /* logical state for the request */
+	struct i915_gem_context *gem_context; /** caller's context */
+
+	/** our requests to build */
+	struct i915_request *requests[MAX_ENGINE_INSTANCE + 1];
+	/** identity of the batch obj/vma */
+	struct eb_vma *batches[MAX_ENGINE_INSTANCE + 1];
+	struct i915_vma *trampoline; /** trampoline used for chaining */
+
+	/** used for excl fence in dma_resv objects when > 1 BB submitted */
+	struct dma_fence *composite_fence;
+
+	/** actual size of execobj[] as we may extend it for the cmdparser */
+	unsigned int buffer_count;
+
+	/* number of batches in execbuf IOCTL */
+	unsigned int num_batches;
+
+	/** list of vma not yet bound during reservation phase */
+	struct list_head unbound;
+
+	/** list of vma that have execobj.relocation_count */
+	struct list_head relocs;
+
+	struct i915_gem_ww_ctx ww;
+
+	/**
+	 * Track the most recently used object for relocations, as we
+	 * frequently have to perform multiple relocations within the same
+	 * obj/page
+	 */
+	struct reloc_cache {
+		struct drm_mm_node node; /** temporary GTT binding */
+		unsigned long vaddr; /** Current kmap address */
+		unsigned long page; /** Currently mapped page index */
+		unsigned int graphics_ver; /** Cached value of GRAPHICS_VER */
+		bool use_64bit_reloc : 1;
+		bool has_llc : 1;
+		bool has_fence : 1;
+		bool needs_unfenced : 1;
+	} reloc_cache;
+
+	u64 invalid_flags; /** Set of execobj.flags that are invalid */
+
+	/** Length of batch within object */
+	u64 batch_len[MAX_ENGINE_INSTANCE + 1];
+	u32 batch_start_offset; /** Location within object of batch */
+	u32 batch_flags; /** Flags composed for emit_bb_start() */
+	struct intel_gt_buffer_pool_node *batch_pool; /** pool node for batch buffer */
+
+	/**
+	 * Indicate either the size of the hastable used to resolve
+	 * relocation handles, or if negative that we are using a direct
+	 * index into the execobj[].
+	 */
+	int lut_size;
+	struct hlist_head *buckets; /** ht for relocation handles */
+
+	struct eb_fence *fences;
+	unsigned long num_fences;
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+	struct i915_capture_list *capture_lists[MAX_ENGINE_INSTANCE + 1];
+#endif
+};
+
+static int eb_parse(struct i915_execbuffer *eb);
+static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle);
+static void eb_unpin_engine(struct i915_execbuffer *eb);
+static void eb_capture_release(struct i915_execbuffer *eb);
+
+static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb)
+{
+	return intel_engine_requires_cmd_parser(eb->context->engine) ||
+		(intel_engine_using_cmd_parser(eb->context->engine) &&
+		 eb->args->batch_len);
+}
+
+static int eb_create(struct i915_execbuffer *eb)
+{
+	if (!(eb->args->flags & I915_EXEC_HANDLE_LUT)) {
+		unsigned int size = 1 + ilog2(eb->buffer_count);
+
+		/*
+		 * Without a 1:1 association between relocation handles and
+		 * the execobject[] index, we instead create a hashtable.
+		 * We size it dynamically based on available memory, starting
+		 * first with 1:1 assocative hash and scaling back until
+		 * the allocation succeeds.
+		 *
+		 * Later on we use a positive lut_size to indicate we are
+		 * using this hashtable, and a negative value to indicate a
+		 * direct lookup.
+		 */
+		do {
+			gfp_t flags;
+
+			/* While we can still reduce the allocation size, don't
+			 * raise a warning and allow the allocation to fail.
+			 * On the last pass though, we want to try as hard
+			 * as possible to perform the allocation and warn
+			 * if it fails.
+			 */
+			flags = GFP_KERNEL;
+			if (size > 1)
+				flags |= __GFP_NORETRY | __GFP_NOWARN;
+
+			eb->buckets = kzalloc(sizeof(struct hlist_head) << size,
+					      flags);
+			if (eb->buckets)
+				break;
+		} while (--size);
+
+		if (unlikely(!size))
+			return -ENOMEM;
+
+		eb->lut_size = size;
+	} else {
+		eb->lut_size = -eb->buffer_count;
+	}
+
+	return 0;
+}
+
+static bool
+eb_vma_misplaced(const struct drm_i915_gem_exec_object2 *entry,
+		 const struct i915_vma *vma,
+		 unsigned int flags)
+{
+	const u64 start = i915_vma_offset(vma);
+	const u64 size = i915_vma_size(vma);
+
+	if (size < entry->pad_to_size)
+		return true;
+
+	if (entry->alignment && !IS_ALIGNED(start, entry->alignment))
+		return true;
+
+	if (flags & EXEC_OBJECT_PINNED &&
+	    start != entry->offset)
+		return true;
+
+	if (flags & __EXEC_OBJECT_NEEDS_BIAS &&
+	    start < BATCH_OFFSET_BIAS)
+		return true;
+
+	if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) &&
+	    (start + size + 4095) >> 32)
+		return true;
+
+	if (flags & __EXEC_OBJECT_NEEDS_MAP &&
+	    !i915_vma_is_map_and_fenceable(vma))
+		return true;
+
+	return false;
+}
+
+static u64 eb_pin_flags(const struct drm_i915_gem_exec_object2 *entry,
+			unsigned int exec_flags)
+{
+	u64 pin_flags = 0;
+
+	if (exec_flags & EXEC_OBJECT_NEEDS_GTT)
+		pin_flags |= PIN_GLOBAL;
+
+	/*
+	 * Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+	 * limit address to the first 4GBs for unflagged objects.
+	 */
+	if (!(exec_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+		pin_flags |= PIN_ZONE_4G;
+
+	if (exec_flags & __EXEC_OBJECT_NEEDS_MAP)
+		pin_flags |= PIN_MAPPABLE;
+
+	if (exec_flags & EXEC_OBJECT_PINNED)
+		pin_flags |= entry->offset | PIN_OFFSET_FIXED;
+	else if (exec_flags & __EXEC_OBJECT_NEEDS_BIAS)
+		pin_flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+
+	return pin_flags;
+}
+
+static inline int
+eb_pin_vma(struct i915_execbuffer *eb,
+	   const struct drm_i915_gem_exec_object2 *entry,
+	   struct eb_vma *ev)
+{
+	struct i915_vma *vma = ev->vma;
+	u64 pin_flags;
+	int err;
+
+	if (vma->node.size)
+		pin_flags =  __i915_vma_offset(vma);
+	else
+		pin_flags = entry->offset & PIN_OFFSET_MASK;
+
+	pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
+	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
+		pin_flags |= PIN_GLOBAL;
+
+	/* Attempt to reuse the current location if available */
+	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, pin_flags);
+	if (err == -EDEADLK)
+		return err;
+
+	if (unlikely(err)) {
+		if (entry->flags & EXEC_OBJECT_PINNED)
+			return err;
+
+		/* Failing that pick any _free_ space if suitable */
+		err = i915_vma_pin_ww(vma, &eb->ww,
+					     entry->pad_to_size,
+					     entry->alignment,
+					     eb_pin_flags(entry, ev->flags) |
+					     PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
+		if (unlikely(err))
+			return err;
+	}
+
+	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
+		err = i915_vma_pin_fence(vma);
+		if (unlikely(err))
+			return err;
+
+		if (vma->fence)
+			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
+	}
+
+	ev->flags |= __EXEC_OBJECT_HAS_PIN;
+	if (eb_vma_misplaced(entry, vma, ev->flags))
+		return -EBADSLT;
+
+	return 0;
+}
+
+static inline void
+eb_unreserve_vma(struct eb_vma *ev)
+{
+	if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
+		__i915_vma_unpin_fence(ev->vma);
+
+	ev->flags &= ~__EXEC_OBJECT_RESERVED;
+}
+
+static int
+eb_validate_vma(struct i915_execbuffer *eb,
+		struct drm_i915_gem_exec_object2 *entry,
+		struct i915_vma *vma)
+{
+	/* Relocations are disallowed for all platforms after TGL-LP.  This
+	 * also covers all platforms with local memory.
+	 */
+	if (entry->relocation_count &&
+	    GRAPHICS_VER(eb->i915) >= 12 && !IS_TIGERLAKE(eb->i915))
+		return -EINVAL;
+
+	if (unlikely(entry->flags & eb->invalid_flags))
+		return -EINVAL;
+
+	if (unlikely(entry->alignment &&
+		     !is_power_of_2_u64(entry->alignment)))
+		return -EINVAL;
+
+	/*
+	 * Offset can be used as input (EXEC_OBJECT_PINNED), reject
+	 * any non-page-aligned or non-canonical addresses.
+	 */
+	if (unlikely(entry->flags & EXEC_OBJECT_PINNED &&
+		     entry->offset != gen8_canonical_addr(entry->offset & I915_GTT_PAGE_MASK)))
+		return -EINVAL;
+
+	/* pad_to_size was once a reserved field, so sanitize it */
+	if (entry->flags & EXEC_OBJECT_PAD_TO_SIZE) {
+		if (unlikely(offset_in_page(entry->pad_to_size)))
+			return -EINVAL;
+	} else {
+		entry->pad_to_size = 0;
+	}
+	/*
+	 * From drm_mm perspective address space is continuous,
+	 * so from this point we're always using non-canonical
+	 * form internally.
+	 */
+	entry->offset = gen8_noncanonical_addr(entry->offset);
+
+	if (!eb->reloc_cache.has_fence) {
+		entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
+	} else {
+		if ((entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
+		     eb->reloc_cache.needs_unfenced) &&
+		    i915_gem_object_is_tiled(vma->obj))
+			entry->flags |= EXEC_OBJECT_NEEDS_GTT | __EXEC_OBJECT_NEEDS_MAP;
+	}
+
+	return 0;
+}
+
+static inline bool
+is_batch_buffer(struct i915_execbuffer *eb, unsigned int buffer_idx)
+{
+	return eb->args->flags & I915_EXEC_BATCH_FIRST ?
+		buffer_idx < eb->num_batches :
+		buffer_idx >= eb->args->buffer_count - eb->num_batches;
+}
+
+static int
+eb_add_vma(struct i915_execbuffer *eb,
+	   unsigned int *current_batch,
+	   unsigned int i,
+	   struct i915_vma *vma)
+{
+	struct drm_i915_private *i915 = eb->i915;
+	struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+	struct eb_vma *ev = &eb->vma[i];
+
+	ev->vma = vma;
+	ev->exec = entry;
+	ev->flags = entry->flags;
+
+	if (eb->lut_size > 0) {
+		ev->handle = entry->handle;
+		hlist_add_head(&ev->node,
+			       &eb->buckets[hash_32(entry->handle,
+						    eb->lut_size)]);
+	}
+
+	if (entry->relocation_count)
+		list_add_tail(&ev->reloc_link, &eb->relocs);
+
+	/*
+	 * SNA is doing fancy tricks with compressing batch buffers, which leads
+	 * to negative relocation deltas. Usually that works out ok since the
+	 * relocate address is still positive, except when the batch is placed
+	 * very low in the GTT. Ensure this doesn't happen.
+	 *
+	 * Note that actual hangs have only been observed on gen7, but for
+	 * paranoia do it everywhere.
+	 */
+	if (is_batch_buffer(eb, i)) {
+		if (entry->relocation_count &&
+		    !(ev->flags & EXEC_OBJECT_PINNED))
+			ev->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+		if (eb->reloc_cache.has_fence)
+			ev->flags |= EXEC_OBJECT_NEEDS_FENCE;
+
+		eb->batches[*current_batch] = ev;
+
+		if (unlikely(ev->flags & EXEC_OBJECT_WRITE)) {
+			drm_dbg(&i915->drm,
+				"Attempting to use self-modifying batch buffer\n");
+			return -EINVAL;
+		}
+
+		if (range_overflows_t(u64,
+				      eb->batch_start_offset,
+				      eb->args->batch_len,
+				      ev->vma->size)) {
+			drm_dbg(&i915->drm, "Attempting to use out-of-bounds batch\n");
+			return -EINVAL;
+		}
+
+		if (eb->args->batch_len == 0)
+			eb->batch_len[*current_batch] = ev->vma->size -
+				eb->batch_start_offset;
+		else
+			eb->batch_len[*current_batch] = eb->args->batch_len;
+		if (unlikely(eb->batch_len[*current_batch] == 0)) { /* impossible! */
+			drm_dbg(&i915->drm, "Invalid batch length\n");
+			return -EINVAL;
+		}
+
+		++*current_batch;
+	}
+
+	return 0;
+}
+
+static inline int use_cpu_reloc(const struct reloc_cache *cache,
+				const struct drm_i915_gem_object *obj)
+{
+	if (!i915_gem_object_has_struct_page(obj))
+		return false;
+
+	if (DBG_FORCE_RELOC == FORCE_CPU_RELOC)
+		return true;
+
+	if (DBG_FORCE_RELOC == FORCE_GTT_RELOC)
+		return false;
+
+	/*
+	 * For objects created by userspace through GEM_CREATE with pat_index
+	 * set by set_pat extension, i915_gem_object_has_cache_level() always
+	 * return true, otherwise the call would fall back to checking whether
+	 * the object is un-cached.
+	 */
+	return (cache->has_llc ||
+		obj->cache_dirty ||
+		!i915_gem_object_has_cache_level(obj, I915_CACHE_NONE));
+}
+
+static int eb_reserve_vma(struct i915_execbuffer *eb,
+			  struct eb_vma *ev,
+			  u64 pin_flags)
+{
+	struct drm_i915_gem_exec_object2 *entry = ev->exec;
+	struct i915_vma *vma = ev->vma;
+	int err;
+
+	if (drm_mm_node_allocated(&vma->node) &&
+	    eb_vma_misplaced(entry, vma, ev->flags)) {
+		err = i915_vma_unbind(vma);
+		if (err)
+			return err;
+	}
+
+	err = i915_vma_pin_ww(vma, &eb->ww,
+			   entry->pad_to_size, entry->alignment,
+			   eb_pin_flags(entry, ev->flags) | pin_flags);
+	if (err)
+		return err;
+
+	if (entry->offset != i915_vma_offset(vma)) {
+		entry->offset = i915_vma_offset(vma) | UPDATE;
+		eb->args->flags |= __EXEC_HAS_RELOC;
+	}
+
+	if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
+		err = i915_vma_pin_fence(vma);
+		if (unlikely(err))
+			return err;
+
+		if (vma->fence)
+			ev->flags |= __EXEC_OBJECT_HAS_FENCE;
+	}
+
+	ev->flags |= __EXEC_OBJECT_HAS_PIN;
+	GEM_BUG_ON(eb_vma_misplaced(entry, vma, ev->flags));
+
+	return 0;
+}
+
+static bool eb_unbind(struct i915_execbuffer *eb, bool force)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+	struct list_head last;
+	bool unpinned = false;
+
+	/* Resort *all* the objects into priority order */
+	INIT_LIST_HEAD(&eb->unbound);
+	INIT_LIST_HEAD(&last);
+
+	for (i = 0; i < count; i++) {
+		struct eb_vma *ev = &eb->vma[i];
+		unsigned int flags = ev->flags;
+
+		if (!force && flags & EXEC_OBJECT_PINNED &&
+		    flags & __EXEC_OBJECT_HAS_PIN)
+			continue;
+
+		unpinned = true;
+		eb_unreserve_vma(ev);
+
+		if (flags & EXEC_OBJECT_PINNED)
+			/* Pinned must have their slot */
+			list_add(&ev->bind_link, &eb->unbound);
+		else if (flags & __EXEC_OBJECT_NEEDS_MAP)
+			/* Map require the lowest 256MiB (aperture) */
+			list_add_tail(&ev->bind_link, &eb->unbound);
+		else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+			/* Prioritise 4GiB region for restricted bo */
+			list_add(&ev->bind_link, &last);
+		else
+			list_add_tail(&ev->bind_link, &last);
+	}
+
+	list_splice_tail(&last, &eb->unbound);
+	return unpinned;
+}
+
+static int eb_reserve(struct i915_execbuffer *eb)
+{
+	struct eb_vma *ev;
+	unsigned int pass;
+	int err = 0;
+
+	/*
+	 * We have one more buffers that we couldn't bind, which could be due to
+	 * various reasons. To resolve this we have 4 passes, with every next
+	 * level turning the screws tighter:
+	 *
+	 * 0. Unbind all objects that do not match the GTT constraints for the
+	 * execbuffer (fenceable, mappable, alignment etc). Bind all new
+	 * objects.  This avoids unnecessary unbinding of later objects in order
+	 * to make room for the earlier objects *unless* we need to defragment.
+	 *
+	 * 1. Reorder the buffers, where objects with the most restrictive
+	 * placement requirements go first (ignoring fixed location buffers for
+	 * now).  For example, objects needing the mappable aperture (the first
+	 * 256M of GTT), should go first vs objects that can be placed just
+	 * about anywhere. Repeat the previous pass.
+	 *
+	 * 2. Consider buffers that are pinned at a fixed location. Also try to
+	 * evict the entire VM this time, leaving only objects that we were
+	 * unable to lock. Try again to bind the buffers. (still using the new
+	 * buffer order).
+	 *
+	 * 3. We likely have object lock contention for one or more stubborn
+	 * objects in the VM, for which we need to evict to make forward
+	 * progress (perhaps we are fighting the shrinker?). When evicting the
+	 * VM this time around, anything that we can't lock we now track using
+	 * the busy_bo, using the full lock (after dropping the vm->mutex to
+	 * prevent deadlocks), instead of trylock. We then continue to evict the
+	 * VM, this time with the stubborn object locked, which we can now
+	 * hopefully unbind (if still bound in the VM). Repeat until the VM is
+	 * evicted. Finally we should be able bind everything.
+	 */
+	for (pass = 0; pass <= 3; pass++) {
+		int pin_flags = PIN_USER | PIN_VALIDATE;
+
+		if (pass == 0)
+			pin_flags |= PIN_NONBLOCK;
+
+		if (pass >= 1)
+			eb_unbind(eb, pass >= 2);
+
+		if (pass == 2) {
+			err = mutex_lock_interruptible(&eb->context->vm->mutex);
+			if (!err) {
+				err = i915_gem_evict_vm(eb->context->vm, &eb->ww, NULL);
+				mutex_unlock(&eb->context->vm->mutex);
+			}
+			if (err)
+				return err;
+		}
+
+		if (pass == 3) {
+retry:
+			err = mutex_lock_interruptible(&eb->context->vm->mutex);
+			if (!err) {
+				struct drm_i915_gem_object *busy_bo = NULL;
+
+				err = i915_gem_evict_vm(eb->context->vm, &eb->ww, &busy_bo);
+				mutex_unlock(&eb->context->vm->mutex);
+				if (err && busy_bo) {
+					err = i915_gem_object_lock(busy_bo, &eb->ww);
+					i915_gem_object_put(busy_bo);
+					if (!err)
+						goto retry;
+				}
+			}
+			if (err)
+				return err;
+		}
+
+		list_for_each_entry(ev, &eb->unbound, bind_link) {
+			err = eb_reserve_vma(eb, ev, pin_flags);
+			if (err)
+				break;
+		}
+
+		if (err != -ENOSPC)
+			break;
+	}
+
+	return err;
+}
+
+static int eb_select_context(struct i915_execbuffer *eb)
+{
+	struct i915_gem_context *ctx;
+
+	ctx = i915_gem_context_lookup(eb->file->driver_priv, eb->args->rsvd1);
+	if (unlikely(IS_ERR(ctx)))
+		return PTR_ERR(ctx);
+
+	eb->gem_context = ctx;
+	if (i915_gem_context_has_full_ppgtt(ctx))
+		eb->invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
+
+	return 0;
+}
+
+static int __eb_add_lut(struct i915_execbuffer *eb,
+			u32 handle, struct i915_vma *vma)
+{
+	struct i915_gem_context *ctx = eb->gem_context;
+	struct i915_lut_handle *lut;
+	int err;
+
+	lut = i915_lut_handle_alloc();
+	if (unlikely(!lut))
+		return -ENOMEM;
+
+	i915_vma_get(vma);
+	if (!atomic_fetch_inc(&vma->open_count))
+		i915_vma_reopen(vma);
+	lut->handle = handle;
+	lut->ctx = ctx;
+
+	/* Check that the context hasn't been closed in the meantime */
+	err = -EINTR;
+	if (!mutex_lock_interruptible(&ctx->lut_mutex)) {
+		if (likely(!i915_gem_context_is_closed(ctx)))
+			err = radix_tree_insert(&ctx->handles_vma, handle, vma);
+		else
+			err = -ENOENT;
+		if (err == 0) { /* And nor has this handle */
+			struct drm_i915_gem_object *obj = vma->obj;
+
+			spin_lock(&obj->lut_lock);
+			if (idr_find(&eb->file->object_idr, handle) == obj) {
+				list_add(&lut->obj_link, &obj->lut_list);
+			} else {
+				radix_tree_delete(&ctx->handles_vma, handle);
+				err = -ENOENT;
+			}
+			spin_unlock(&obj->lut_lock);
+		}
+		mutex_unlock(&ctx->lut_mutex);
+	}
+	if (unlikely(err))
+		goto err;
+
+	return 0;
+
+err:
+	i915_vma_close(vma);
+	i915_vma_put(vma);
+	i915_lut_handle_free(lut);
+	return err;
+}
+
+static struct i915_vma *eb_lookup_vma(struct i915_execbuffer *eb, u32 handle)
+{
+	struct i915_address_space *vm = eb->context->vm;
+
+	do {
+		struct drm_i915_gem_object *obj;
+		struct i915_vma *vma;
+		int err;
+
+		rcu_read_lock();
+		vma = radix_tree_lookup(&eb->gem_context->handles_vma, handle);
+		if (likely(vma && vma->vm == vm))
+			vma = i915_vma_tryget(vma);
+		rcu_read_unlock();
+		if (likely(vma))
+			return vma;
+
+		obj = i915_gem_object_lookup(eb->file, handle);
+		if (unlikely(!obj))
+			return ERR_PTR(-ENOENT);
+
+		/*
+		 * If the user has opted-in for protected-object tracking, make
+		 * sure the object encryption can be used.
+		 * We only need to do this when the object is first used with
+		 * this context, because the context itself will be banned when
+		 * the protected objects become invalid.
+		 */
+		if (i915_gem_context_uses_protected_content(eb->gem_context) &&
+		    i915_gem_object_is_protected(obj)) {
+			err = intel_pxp_key_check(eb->i915->pxp, obj, true);
+			if (err) {
+				i915_gem_object_put(obj);
+				return ERR_PTR(err);
+			}
+		}
+
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			i915_gem_object_put(obj);
+			return vma;
+		}
+
+		err = __eb_add_lut(eb, handle, vma);
+		if (likely(!err))
+			return vma;
+
+		i915_gem_object_put(obj);
+		if (err != -EEXIST)
+			return ERR_PTR(err);
+	} while (1);
+}
+
+static int eb_lookup_vmas(struct i915_execbuffer *eb)
+{
+	unsigned int i, current_batch = 0;
+	int err = 0;
+
+	INIT_LIST_HEAD(&eb->relocs);
+
+	for (i = 0; i < eb->buffer_count; i++) {
+		struct i915_vma *vma;
+
+		vma = eb_lookup_vma(eb, eb->exec[i].handle);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto err;
+		}
+
+		err = eb_validate_vma(eb, &eb->exec[i], vma);
+		if (unlikely(err)) {
+			i915_vma_put(vma);
+			goto err;
+		}
+
+		err = eb_add_vma(eb, &current_batch, i, vma);
+		if (err)
+			return err;
+
+		if (i915_gem_object_is_userptr(vma->obj)) {
+			err = i915_gem_object_userptr_submit_init(vma->obj);
+			if (err) {
+				if (i + 1 < eb->buffer_count) {
+					/*
+					 * Execbuffer code expects last vma entry to be NULL,
+					 * since we already initialized this entry,
+					 * set the next value to NULL or we mess up
+					 * cleanup handling.
+					 */
+					eb->vma[i + 1].vma = NULL;
+				}
+
+				return err;
+			}
+
+			eb->vma[i].flags |= __EXEC_OBJECT_USERPTR_INIT;
+			eb->args->flags |= __EXEC_USERPTR_USED;
+		}
+	}
+
+	return 0;
+
+err:
+	eb->vma[i].vma = NULL;
+	return err;
+}
+
+static int eb_lock_vmas(struct i915_execbuffer *eb)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < eb->buffer_count; i++) {
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
+
+		err = i915_gem_object_lock(vma->obj, &eb->ww);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int eb_validate_vmas(struct i915_execbuffer *eb)
+{
+	unsigned int i;
+	int err;
+
+	INIT_LIST_HEAD(&eb->unbound);
+
+	err = eb_lock_vmas(eb);
+	if (err)
+		return err;
+
+	for (i = 0; i < eb->buffer_count; i++) {
+		struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
+
+		err = eb_pin_vma(eb, entry, ev);
+		if (err == -EDEADLK)
+			return err;
+
+		if (!err) {
+			if (entry->offset != i915_vma_offset(vma)) {
+				entry->offset = i915_vma_offset(vma) | UPDATE;
+				eb->args->flags |= __EXEC_HAS_RELOC;
+			}
+		} else {
+			eb_unreserve_vma(ev);
+
+			list_add_tail(&ev->bind_link, &eb->unbound);
+			if (drm_mm_node_allocated(&vma->node)) {
+				err = i915_vma_unbind(vma);
+				if (err)
+					return err;
+			}
+		}
+
+		/* Reserve enough slots to accommodate composite fences */
+		err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
+		if (err)
+			return err;
+
+		GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+			   eb_vma_misplaced(&eb->exec[i], vma, ev->flags));
+	}
+
+	if (!list_empty(&eb->unbound))
+		return eb_reserve(eb);
+
+	return 0;
+}
+
+static struct eb_vma *
+eb_get_vma(const struct i915_execbuffer *eb, unsigned long handle)
+{
+	if (eb->lut_size < 0) {
+		if (handle >= -eb->lut_size)
+			return NULL;
+		return &eb->vma[handle];
+	} else {
+		struct hlist_head *head;
+		struct eb_vma *ev;
+
+		head = &eb->buckets[hash_32(handle, eb->lut_size)];
+		hlist_for_each_entry(ev, head, node) {
+			if (ev->handle == handle)
+				return ev;
+		}
+		return NULL;
+	}
+}
+
+static void eb_release_vmas(struct i915_execbuffer *eb, bool final)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
+
+		if (!vma)
+			break;
+
+		eb_unreserve_vma(ev);
+
+		if (final)
+			i915_vma_put(vma);
+	}
+
+	eb_capture_release(eb);
+	eb_unpin_engine(eb);
+}
+
+static void eb_destroy(const struct i915_execbuffer *eb)
+{
+	if (eb->lut_size > 0)
+		kfree(eb->buckets);
+}
+
+static inline u64
+relocation_target(const struct drm_i915_gem_relocation_entry *reloc,
+		  const struct i915_vma *target)
+{
+	return gen8_canonical_addr((int)reloc->delta + i915_vma_offset(target));
+}
+
+static void reloc_cache_init(struct reloc_cache *cache,
+			     struct drm_i915_private *i915)
+{
+	cache->page = -1;
+	cache->vaddr = 0;
+	/* Must be a variable in the struct to allow GCC to unroll. */
+	cache->graphics_ver = GRAPHICS_VER(i915);
+	cache->has_llc = HAS_LLC(i915);
+	cache->use_64bit_reloc = HAS_64BIT_RELOC(i915);
+	cache->has_fence = cache->graphics_ver < 4;
+	cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
+	cache->node.flags = 0;
+}
+
+static inline void *unmask_page(unsigned long p)
+{
+	return (void *)(uintptr_t)(p & PAGE_MASK);
+}
+
+static inline unsigned int unmask_flags(unsigned long p)
+{
+	return p & ~PAGE_MASK;
+}
+
+#define KMAP 0x4 /* after CLFLUSH_FLAGS */
+
+static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
+{
+	struct drm_i915_private *i915 =
+		container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
+	return to_gt(i915)->ggtt;
+}
+
+static void reloc_cache_unmap(struct reloc_cache *cache)
+{
+	void *vaddr;
+
+	if (!cache->vaddr)
+		return;
+
+	vaddr = unmask_page(cache->vaddr);
+	if (cache->vaddr & KMAP)
+		kunmap_atomic(vaddr);
+	else
+		io_mapping_unmap_atomic((void __iomem *)vaddr);
+}
+
+static void reloc_cache_remap(struct reloc_cache *cache,
+			      struct drm_i915_gem_object *obj)
+{
+	void *vaddr;
+
+	if (!cache->vaddr)
+		return;
+
+	if (cache->vaddr & KMAP) {
+		struct page *page = i915_gem_object_get_page(obj, cache->page);
+
+		vaddr = kmap_atomic(page);
+		cache->vaddr = unmask_flags(cache->vaddr) |
+			(unsigned long)vaddr;
+	} else {
+		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+		unsigned long offset;
+
+		offset = cache->node.start;
+		if (!drm_mm_node_allocated(&cache->node))
+			offset += cache->page << PAGE_SHIFT;
+
+		cache->vaddr = (unsigned long)
+			io_mapping_map_atomic_wc(&ggtt->iomap, offset);
+	}
+}
+
+static void reloc_cache_reset(struct reloc_cache *cache, struct i915_execbuffer *eb)
+{
+	void *vaddr;
+
+	if (!cache->vaddr)
+		return;
+
+	vaddr = unmask_page(cache->vaddr);
+	if (cache->vaddr & KMAP) {
+		struct drm_i915_gem_object *obj =
+			(struct drm_i915_gem_object *)cache->node.mm;
+		if (cache->vaddr & CLFLUSH_AFTER)
+			mb();
+
+		kunmap_atomic(vaddr);
+		i915_gem_object_finish_access(obj);
+	} else {
+		struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+
+		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+		io_mapping_unmap_atomic((void __iomem *)vaddr);
+
+		if (drm_mm_node_allocated(&cache->node)) {
+			ggtt->vm.clear_range(&ggtt->vm,
+					     cache->node.start,
+					     cache->node.size);
+			mutex_lock(&ggtt->vm.mutex);
+			drm_mm_remove_node(&cache->node);
+			mutex_unlock(&ggtt->vm.mutex);
+		} else {
+			i915_vma_unpin((struct i915_vma *)cache->node.mm);
+		}
+	}
+
+	cache->vaddr = 0;
+	cache->page = -1;
+}
+
+static void *reloc_kmap(struct drm_i915_gem_object *obj,
+			struct reloc_cache *cache,
+			unsigned long pageno)
+{
+	void *vaddr;
+	struct page *page;
+
+	if (cache->vaddr) {
+		kunmap_atomic(unmask_page(cache->vaddr));
+	} else {
+		unsigned int flushes;
+		int err;
+
+		err = i915_gem_object_prepare_write(obj, &flushes);
+		if (err)
+			return ERR_PTR(err);
+
+		BUILD_BUG_ON(KMAP & CLFLUSH_FLAGS);
+		BUILD_BUG_ON((KMAP | CLFLUSH_FLAGS) & PAGE_MASK);
+
+		cache->vaddr = flushes | KMAP;
+		cache->node.mm = (void *)obj;
+		if (flushes)
+			mb();
+	}
+
+	page = i915_gem_object_get_page(obj, pageno);
+	if (!obj->mm.dirty)
+		set_page_dirty(page);
+
+	vaddr = kmap_atomic(page);
+	cache->vaddr = unmask_flags(cache->vaddr) | (unsigned long)vaddr;
+	cache->page = pageno;
+
+	return vaddr;
+}
+
+static void *reloc_iomap(struct i915_vma *batch,
+			 struct i915_execbuffer *eb,
+			 unsigned long page)
+{
+	struct drm_i915_gem_object *obj = batch->obj;
+	struct reloc_cache *cache = &eb->reloc_cache;
+	struct i915_ggtt *ggtt = cache_to_ggtt(cache);
+	unsigned long offset;
+	void *vaddr;
+
+	if (cache->vaddr) {
+		intel_gt_flush_ggtt_writes(ggtt->vm.gt);
+		io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
+	} else {
+		struct i915_vma *vma = ERR_PTR(-ENODEV);
+		int err;
+
+		if (i915_gem_object_is_tiled(obj))
+			return ERR_PTR(-EINVAL);
+
+		if (use_cpu_reloc(cache, obj))
+			return NULL;
+
+		err = i915_gem_object_set_to_gtt_domain(obj, true);
+		if (err)
+			return ERR_PTR(err);
+
+		/*
+		 * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
+		 * VMA from the object list because we no longer pin.
+		 *
+		 * Only attempt to pin the batch buffer to ggtt if the current batch
+		 * is not inside ggtt, or the batch buffer is not misplaced.
+		 */
+		if (!i915_is_ggtt(batch->vm) ||
+		    !i915_vma_misplaced(batch, 0, 0, PIN_MAPPABLE)) {
+			vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+							  PIN_MAPPABLE |
+							  PIN_NONBLOCK /* NOWARN */ |
+							  PIN_NOEVICT);
+		}
+
+		if (vma == ERR_PTR(-EDEADLK))
+			return vma;
+
+		if (IS_ERR(vma)) {
+			memset(&cache->node, 0, sizeof(cache->node));
+			mutex_lock(&ggtt->vm.mutex);
+			err = drm_mm_insert_node_in_range
+				(&ggtt->vm.mm, &cache->node,
+				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
+				 0, ggtt->mappable_end,
+				 DRM_MM_INSERT_LOW);
+			mutex_unlock(&ggtt->vm.mutex);
+			if (err) /* no inactive aperture space, use cpu reloc */
+				return NULL;
+		} else {
+			cache->node.start = i915_ggtt_offset(vma);
+			cache->node.mm = (void *)vma;
+		}
+	}
+
+	offset = cache->node.start;
+	if (drm_mm_node_allocated(&cache->node)) {
+		ggtt->vm.insert_page(&ggtt->vm,
+				     i915_gem_object_get_dma_address(obj, page),
+				     offset,
+				     i915_gem_get_pat_index(ggtt->vm.i915,
+							    I915_CACHE_NONE),
+				     0);
+	} else {
+		offset += page << PAGE_SHIFT;
+	}
+
+	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
+							 offset);
+	cache->page = page;
+	cache->vaddr = (unsigned long)vaddr;
+
+	return vaddr;
+}
+
+static void *reloc_vaddr(struct i915_vma *vma,
+			 struct i915_execbuffer *eb,
+			 unsigned long page)
+{
+	struct reloc_cache *cache = &eb->reloc_cache;
+	void *vaddr;
+
+	if (cache->page == page) {
+		vaddr = unmask_page(cache->vaddr);
+	} else {
+		vaddr = NULL;
+		if ((cache->vaddr & KMAP) == 0)
+			vaddr = reloc_iomap(vma, eb, page);
+		if (!vaddr)
+			vaddr = reloc_kmap(vma->obj, cache, page);
+	}
+
+	return vaddr;
+}
+
+static void clflush_write32(u32 *addr, u32 value, unsigned int flushes)
+{
+	if (unlikely(flushes & (CLFLUSH_BEFORE | CLFLUSH_AFTER))) {
+		if (flushes & CLFLUSH_BEFORE)
+			drm_clflush_virt_range(addr, sizeof(*addr));
+
+		*addr = value;
+
+		/*
+		 * Writes to the same cacheline are serialised by the CPU
+		 * (including clflush). On the write path, we only require
+		 * that it hits memory in an orderly fashion and place
+		 * mb barriers at the start and end of the relocation phase
+		 * to ensure ordering of clflush wrt to the system.
+		 */
+		if (flushes & CLFLUSH_AFTER)
+			drm_clflush_virt_range(addr, sizeof(*addr));
+	} else
+		*addr = value;
+}
+
+static u64
+relocate_entry(struct i915_vma *vma,
+	       const struct drm_i915_gem_relocation_entry *reloc,
+	       struct i915_execbuffer *eb,
+	       const struct i915_vma *target)
+{
+	u64 target_addr = relocation_target(reloc, target);
+	u64 offset = reloc->offset;
+	bool wide = eb->reloc_cache.use_64bit_reloc;
+	void *vaddr;
+
+repeat:
+	vaddr = reloc_vaddr(vma, eb,
+			    offset >> PAGE_SHIFT);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	GEM_BUG_ON(!IS_ALIGNED(offset, sizeof(u32)));
+	clflush_write32(vaddr + offset_in_page(offset),
+			lower_32_bits(target_addr),
+			eb->reloc_cache.vaddr);
+
+	if (wide) {
+		offset += sizeof(u32);
+		target_addr >>= 32;
+		wide = false;
+		goto repeat;
+	}
+
+	return target->node.start | UPDATE;
+}
+
+static u64
+eb_relocate_entry(struct i915_execbuffer *eb,
+		  struct eb_vma *ev,
+		  const struct drm_i915_gem_relocation_entry *reloc)
+{
+	struct drm_i915_private *i915 = eb->i915;
+	struct eb_vma *target;
+	int err;
+
+	/* we've already hold a reference to all valid objects */
+	target = eb_get_vma(eb, reloc->target_handle);
+	if (unlikely(!target))
+		return -ENOENT;
+
+	/* Validate that the target is in a valid r/w GPU domain */
+	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
+		drm_dbg(&i915->drm, "reloc with multiple write domains: "
+			  "target %d offset %d "
+			  "read %08x write %08x",
+			  reloc->target_handle,
+			  (int) reloc->offset,
+			  reloc->read_domains,
+			  reloc->write_domain);
+		return -EINVAL;
+	}
+	if (unlikely((reloc->write_domain | reloc->read_domains)
+		     & ~I915_GEM_GPU_DOMAINS)) {
+		drm_dbg(&i915->drm, "reloc with read/write non-GPU domains: "
+			  "target %d offset %d "
+			  "read %08x write %08x",
+			  reloc->target_handle,
+			  (int) reloc->offset,
+			  reloc->read_domains,
+			  reloc->write_domain);
+		return -EINVAL;
+	}
+
+	if (reloc->write_domain) {
+		target->flags |= EXEC_OBJECT_WRITE;
+
+		/*
+		 * Sandybridge PPGTT errata: We need a global gtt mapping
+		 * for MI and pipe_control writes because the gpu doesn't
+		 * properly redirect them through the ppgtt for non_secure
+		 * batchbuffers.
+		 */
+		if (reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
+		    GRAPHICS_VER(eb->i915) == 6 &&
+		    !i915_vma_is_bound(target->vma, I915_VMA_GLOBAL_BIND)) {
+			struct i915_vma *vma = target->vma;
+
+			reloc_cache_unmap(&eb->reloc_cache);
+			mutex_lock(&vma->vm->mutex);
+			err = i915_vma_bind(target->vma,
+					    target->vma->obj->pat_index,
+					    PIN_GLOBAL, NULL, NULL);
+			mutex_unlock(&vma->vm->mutex);
+			reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
+			if (err)
+				return err;
+		}
+	}
+
+	/*
+	 * If the relocation already has the right value in it, no
+	 * more work needs to be done.
+	 */
+	if (!DBG_FORCE_RELOC &&
+	    gen8_canonical_addr(i915_vma_offset(target->vma)) == reloc->presumed_offset)
+		return 0;
+
+	/* Check that the relocation address is valid... */
+	if (unlikely(reloc->offset >
+		     ev->vma->size - (eb->reloc_cache.use_64bit_reloc ? 8 : 4))) {
+		drm_dbg(&i915->drm, "Relocation beyond object bounds: "
+			  "target %d offset %d size %d.\n",
+			  reloc->target_handle,
+			  (int)reloc->offset,
+			  (int)ev->vma->size);
+		return -EINVAL;
+	}
+	if (unlikely(reloc->offset & 3)) {
+		drm_dbg(&i915->drm, "Relocation not 4-byte aligned: "
+			  "target %d offset %d.\n",
+			  reloc->target_handle,
+			  (int)reloc->offset);
+		return -EINVAL;
+	}
+
+	/*
+	 * If we write into the object, we need to force the synchronisation
+	 * barrier, either with an asynchronous clflush or if we executed the
+	 * patching using the GPU (though that should be serialised by the
+	 * timeline). To be completely sure, and since we are required to
+	 * do relocations we are already stalling, disable the user's opt
+	 * out of our synchronisation.
+	 */
+	ev->flags &= ~EXEC_OBJECT_ASYNC;
+
+	/* and update the user's relocation entry */
+	return relocate_entry(ev->vma, reloc, eb, target->vma);
+}
+
+static int eb_relocate_vma(struct i915_execbuffer *eb, struct eb_vma *ev)
+{
+#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
+	struct drm_i915_gem_relocation_entry stack[N_RELOC(512)];
+	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
+	struct drm_i915_gem_relocation_entry __user *urelocs =
+		u64_to_user_ptr(entry->relocs_ptr);
+	unsigned long remain = entry->relocation_count;
+
+	if (unlikely(remain > N_RELOC(ULONG_MAX)))
+		return -EINVAL;
+
+	/*
+	 * We must check that the entire relocation array is safe
+	 * to read. However, if the array is not writable the user loses
+	 * the updated relocation values.
+	 */
+	if (unlikely(!access_ok(urelocs, remain * sizeof(*urelocs))))
+		return -EFAULT;
+
+	do {
+		struct drm_i915_gem_relocation_entry *r = stack;
+		unsigned int count =
+			min_t(unsigned long, remain, ARRAY_SIZE(stack));
+		unsigned int copied;
+
+		/*
+		 * This is the fast path and we cannot handle a pagefault
+		 * whilst holding the struct mutex lest the user pass in the
+		 * relocations contained within a mmaped bo. For in such a case
+		 * we, the page fault handler would call i915_gem_fault() and
+		 * we would try to acquire the struct mutex again. Obviously
+		 * this is bad and so lockdep complains vehemently.
+		 */
+		pagefault_disable();
+		copied = __copy_from_user_inatomic(r, urelocs, count * sizeof(r[0]));
+		pagefault_enable();
+		if (unlikely(copied)) {
+			remain = -EFAULT;
+			goto out;
+		}
+
+		remain -= count;
+		do {
+			u64 offset = eb_relocate_entry(eb, ev, r);
+
+			if (likely(offset == 0)) {
+			} else if ((s64)offset < 0) {
+				remain = (int)offset;
+				goto out;
+			} else {
+				/*
+				 * Note that reporting an error now
+				 * leaves everything in an inconsistent
+				 * state as we have *already* changed
+				 * the relocation value inside the
+				 * object. As we have not changed the
+				 * reloc.presumed_offset or will not
+				 * change the execobject.offset, on the
+				 * call we may not rewrite the value
+				 * inside the object, leaving it
+				 * dangling and causing a GPU hang. Unless
+				 * userspace dynamically rebuilds the
+				 * relocations on each execbuf rather than
+				 * presume a static tree.
+				 *
+				 * We did previously check if the relocations
+				 * were writable (access_ok), an error now
+				 * would be a strange race with mprotect,
+				 * having already demonstrated that we
+				 * can read from this userspace address.
+				 */
+				offset = gen8_canonical_addr(offset & ~UPDATE);
+				__put_user(offset,
+					   &urelocs[r - stack].presumed_offset);
+			}
+		} while (r++, --count);
+		urelocs += ARRAY_SIZE(stack);
+	} while (remain);
+out:
+	reloc_cache_reset(&eb->reloc_cache, eb);
+	return remain;
+}
+
+static int
+eb_relocate_vma_slow(struct i915_execbuffer *eb, struct eb_vma *ev)
+{
+	const struct drm_i915_gem_exec_object2 *entry = ev->exec;
+	struct drm_i915_gem_relocation_entry *relocs =
+		u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < entry->relocation_count; i++) {
+		u64 offset = eb_relocate_entry(eb, ev, &relocs[i]);
+
+		if ((s64)offset < 0) {
+			err = (int)offset;
+			goto err;
+		}
+	}
+	err = 0;
+err:
+	reloc_cache_reset(&eb->reloc_cache, eb);
+	return err;
+}
+
+static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
+{
+	const char __user *addr, *end;
+	unsigned long size;
+	char __maybe_unused c;
+
+	size = entry->relocation_count;
+	if (size == 0)
+		return 0;
+
+	if (size > N_RELOC(ULONG_MAX))
+		return -EINVAL;
+
+	addr = u64_to_user_ptr(entry->relocs_ptr);
+	size *= sizeof(struct drm_i915_gem_relocation_entry);
+	if (!access_ok(addr, size))
+		return -EFAULT;
+
+	end = addr + size;
+	for (; addr < end; addr += PAGE_SIZE) {
+		int err = __get_user(c, addr);
+		if (err)
+			return err;
+	}
+	return __get_user(c, end - 1);
+}
+
+static int eb_copy_relocations(const struct i915_execbuffer *eb)
+{
+	struct drm_i915_gem_relocation_entry *relocs;
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < count; i++) {
+		const unsigned int nreloc = eb->exec[i].relocation_count;
+		struct drm_i915_gem_relocation_entry __user *urelocs;
+		unsigned long size;
+		unsigned long copied;
+
+		if (nreloc == 0)
+			continue;
+
+		err = check_relocations(&eb->exec[i]);
+		if (err)
+			goto err;
+
+		urelocs = u64_to_user_ptr(eb->exec[i].relocs_ptr);
+		size = nreloc * sizeof(*relocs);
+
+		relocs = kvmalloc_array(size, 1, GFP_KERNEL);
+		if (!relocs) {
+			err = -ENOMEM;
+			goto err;
+		}
+
+		/* copy_from_user is limited to < 4GiB */
+		copied = 0;
+		do {
+			unsigned int len =
+				min_t(u64, BIT_ULL(31), size - copied);
+
+			if (__copy_from_user((char *)relocs + copied,
+					     (char __user *)urelocs + copied,
+					     len))
+				goto end;
+
+			copied += len;
+		} while (copied < size);
+
+		/*
+		 * As we do not update the known relocation offsets after
+		 * relocating (due to the complexities in lock handling),
+		 * we need to mark them as invalid now so that we force the
+		 * relocation processing next time. Just in case the target
+		 * object is evicted and then rebound into its old
+		 * presumed_offset before the next execbuffer - if that
+		 * happened we would make the mistake of assuming that the
+		 * relocations were valid.
+		 */
+		if (!user_access_begin(urelocs, size))
+			goto end;
+
+		for (copied = 0; copied < nreloc; copied++)
+			unsafe_put_user(-1,
+					&urelocs[copied].presumed_offset,
+					end_user);
+		user_access_end();
+
+		eb->exec[i].relocs_ptr = (uintptr_t)relocs;
+	}
+
+	return 0;
+
+end_user:
+	user_access_end();
+end:
+	kvfree(relocs);
+	err = -EFAULT;
+err:
+	while (i--) {
+		relocs = u64_to_ptr(typeof(*relocs), eb->exec[i].relocs_ptr);
+		if (eb->exec[i].relocation_count)
+			kvfree(relocs);
+	}
+	return err;
+}
+
+static int eb_prefault_relocations(const struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		int err;
+
+		err = check_relocations(&eb->exec[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int eb_reinit_userptr(struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i;
+	int ret;
+
+	if (likely(!(eb->args->flags & __EXEC_USERPTR_USED)))
+		return 0;
+
+	for (i = 0; i < count; i++) {
+		struct eb_vma *ev = &eb->vma[i];
+
+		if (!i915_gem_object_is_userptr(ev->vma->obj))
+			continue;
+
+		ret = i915_gem_object_userptr_submit_init(ev->vma->obj);
+		if (ret)
+			return ret;
+
+		ev->flags |= __EXEC_OBJECT_USERPTR_INIT;
+	}
+
+	return 0;
+}
+
+static noinline int eb_relocate_parse_slow(struct i915_execbuffer *eb)
+{
+	bool have_copy = false;
+	struct eb_vma *ev;
+	int err = 0;
+
+repeat:
+	if (signal_pending(current)) {
+		err = -ERESTARTSYS;
+		goto out;
+	}
+
+	/* We may process another execbuffer during the unlock... */
+	eb_release_vmas(eb, false);
+	i915_gem_ww_ctx_fini(&eb->ww);
+
+	/*
+	 * We take 3 passes through the slowpatch.
+	 *
+	 * 1 - we try to just prefault all the user relocation entries and
+	 * then attempt to reuse the atomic pagefault disabled fast path again.
+	 *
+	 * 2 - we copy the user entries to a local buffer here outside of the
+	 * local and allow ourselves to wait upon any rendering before
+	 * relocations
+	 *
+	 * 3 - we already have a local copy of the relocation entries, but
+	 * were interrupted (EAGAIN) whilst waiting for the objects, try again.
+	 */
+	if (!err) {
+		err = eb_prefault_relocations(eb);
+	} else if (!have_copy) {
+		err = eb_copy_relocations(eb);
+		have_copy = err == 0;
+	} else {
+		cond_resched();
+		err = 0;
+	}
+
+	if (!err)
+		err = eb_reinit_userptr(eb);
+
+	i915_gem_ww_ctx_init(&eb->ww, true);
+	if (err)
+		goto out;
+
+	/* reacquire the objects */
+repeat_validate:
+	err = eb_pin_engine(eb, false);
+	if (err)
+		goto err;
+
+	err = eb_validate_vmas(eb);
+	if (err)
+		goto err;
+
+	GEM_BUG_ON(!eb->batches[0]);
+
+	list_for_each_entry(ev, &eb->relocs, reloc_link) {
+		if (!have_copy) {
+			err = eb_relocate_vma(eb, ev);
+			if (err)
+				break;
+		} else {
+			err = eb_relocate_vma_slow(eb, ev);
+			if (err)
+				break;
+		}
+	}
+
+	if (err == -EDEADLK)
+		goto err;
+
+	if (err && !have_copy)
+		goto repeat;
+
+	if (err)
+		goto err;
+
+	/* as last step, parse the command buffer */
+	err = eb_parse(eb);
+	if (err)
+		goto err;
+
+	/*
+	 * Leave the user relocations as are, this is the painfully slow path,
+	 * and we want to avoid the complication of dropping the lock whilst
+	 * having buffers reserved in the aperture and so causing spurious
+	 * ENOSPC for random operations.
+	 */
+
+err:
+	if (err == -EDEADLK) {
+		eb_release_vmas(eb, false);
+		err = i915_gem_ww_ctx_backoff(&eb->ww);
+		if (!err)
+			goto repeat_validate;
+	}
+
+	if (err == -EAGAIN)
+		goto repeat;
+
+out:
+	if (have_copy) {
+		const unsigned int count = eb->buffer_count;
+		unsigned int i;
+
+		for (i = 0; i < count; i++) {
+			const struct drm_i915_gem_exec_object2 *entry =
+				&eb->exec[i];
+			struct drm_i915_gem_relocation_entry *relocs;
+
+			if (!entry->relocation_count)
+				continue;
+
+			relocs = u64_to_ptr(typeof(*relocs), entry->relocs_ptr);
+			kvfree(relocs);
+		}
+	}
+
+	return err;
+}
+
+static int eb_relocate_parse(struct i915_execbuffer *eb)
+{
+	int err;
+	bool throttle = true;
+
+retry:
+	err = eb_pin_engine(eb, throttle);
+	if (err) {
+		if (err != -EDEADLK)
+			return err;
+
+		goto err;
+	}
+
+	/* only throttle once, even if we didn't need to throttle */
+	throttle = false;
+
+	err = eb_validate_vmas(eb);
+	if (err == -EAGAIN)
+		goto slow;
+	else if (err)
+		goto err;
+
+	/* The objects are in their final locations, apply the relocations. */
+	if (eb->args->flags & __EXEC_HAS_RELOC) {
+		struct eb_vma *ev;
+
+		list_for_each_entry(ev, &eb->relocs, reloc_link) {
+			err = eb_relocate_vma(eb, ev);
+			if (err)
+				break;
+		}
+
+		if (err == -EDEADLK)
+			goto err;
+		else if (err)
+			goto slow;
+	}
+
+	if (!err)
+		err = eb_parse(eb);
+
+err:
+	if (err == -EDEADLK) {
+		eb_release_vmas(eb, false);
+		err = i915_gem_ww_ctx_backoff(&eb->ww);
+		if (!err)
+			goto retry;
+	}
+
+	return err;
+
+slow:
+	err = eb_relocate_parse_slow(eb);
+	if (err)
+		/*
+		 * If the user expects the execobject.offset and
+		 * reloc.presumed_offset to be an exact match,
+		 * as for using NO_RELOC, then we cannot update
+		 * the execobject.offset until we have completed
+		 * relocation.
+		 */
+		eb->args->flags &= ~__EXEC_HAS_RELOC;
+
+	return err;
+}
+
+/*
+ * Using two helper loops for the order of which requests / batches are created
+ * and added the to backend. Requests are created in order from the parent to
+ * the last child. Requests are added in the reverse order, from the last child
+ * to parent. This is done for locking reasons as the timeline lock is acquired
+ * during request creation and released when the request is added to the
+ * backend. To make lockdep happy (see intel_context_timeline_lock) this must be
+ * the ordering.
+ */
+#define for_each_batch_create_order(_eb, _i) \
+	for ((_i) = 0; (_i) < (_eb)->num_batches; ++(_i))
+#define for_each_batch_add_order(_eb, _i) \
+	BUILD_BUG_ON(!typecheck(int, _i)); \
+	for ((_i) = (_eb)->num_batches - 1; (_i) >= 0; --(_i))
+
+static struct i915_request *
+eb_find_first_request_added(struct i915_execbuffer *eb)
+{
+	int i;
+
+	for_each_batch_add_order(eb, i)
+		if (eb->requests[i])
+			return eb->requests[i];
+
+	GEM_BUG_ON("Request not found");
+
+	return NULL;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
+
+/* Stage with GFP_KERNEL allocations before we enter the signaling critical path */
+static int eb_capture_stage(struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i = count, j;
+
+	while (i--) {
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
+		unsigned int flags = ev->flags;
+
+		if (!(flags & EXEC_OBJECT_CAPTURE))
+			continue;
+
+		if (i915_gem_context_is_recoverable(eb->gem_context) &&
+		    (IS_DGFX(eb->i915) || GRAPHICS_VER_FULL(eb->i915) > IP_VER(12, 0)))
+			return -EINVAL;
+
+		for_each_batch_create_order(eb, j) {
+			struct i915_capture_list *capture;
+
+			capture = kmalloc(sizeof(*capture), GFP_KERNEL);
+			if (!capture)
+				continue;
+
+			capture->next = eb->capture_lists[j];
+			capture->vma_res = i915_vma_resource_get(vma->resource);
+			eb->capture_lists[j] = capture;
+		}
+	}
+
+	return 0;
+}
+
+/* Commit once we're in the critical path */
+static void eb_capture_commit(struct i915_execbuffer *eb)
+{
+	unsigned int j;
+
+	for_each_batch_create_order(eb, j) {
+		struct i915_request *rq = eb->requests[j];
+
+		if (!rq)
+			break;
+
+		rq->capture_list = eb->capture_lists[j];
+		eb->capture_lists[j] = NULL;
+	}
+}
+
+/*
+ * Release anything that didn't get committed due to errors.
+ * The capture_list will otherwise be freed at request retire.
+ */
+static void eb_capture_release(struct i915_execbuffer *eb)
+{
+	unsigned int j;
+
+	for_each_batch_create_order(eb, j) {
+		if (eb->capture_lists[j]) {
+			i915_request_free_capture_list(eb->capture_lists[j]);
+			eb->capture_lists[j] = NULL;
+		}
+	}
+}
+
+static void eb_capture_list_clear(struct i915_execbuffer *eb)
+{
+	memset(eb->capture_lists, 0, sizeof(eb->capture_lists));
+}
+
+#else
+
+static int eb_capture_stage(struct i915_execbuffer *eb)
+{
+	return 0;
+}
+
+static void eb_capture_commit(struct i915_execbuffer *eb)
+{
+}
+
+static void eb_capture_release(struct i915_execbuffer *eb)
+{
+}
+
+static void eb_capture_list_clear(struct i915_execbuffer *eb)
+{
+}
+
+#endif
+
+static int eb_move_to_gpu(struct i915_execbuffer *eb)
+{
+	const unsigned int count = eb->buffer_count;
+	unsigned int i = count;
+	int err = 0, j;
+
+	while (i--) {
+		struct eb_vma *ev = &eb->vma[i];
+		struct i915_vma *vma = ev->vma;
+		unsigned int flags = ev->flags;
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		assert_vma_held(vma);
+
+		/*
+		 * If the GPU is not _reading_ through the CPU cache, we need
+		 * to make sure that any writes (both previous GPU writes from
+		 * before a change in snooping levels and normal CPU writes)
+		 * caught in that cache are flushed to main memory.
+		 *
+		 * We want to say
+		 *   obj->cache_dirty &&
+		 *   !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)
+		 * but gcc's optimiser doesn't handle that as well and emits
+		 * two jumps instead of one. Maybe one day...
+		 *
+		 * FIXME: There is also sync flushing in set_pages(), which
+		 * serves a different purpose(some of the time at least).
+		 *
+		 * We should consider:
+		 *
+		 *   1. Rip out the async flush code.
+		 *
+		 *   2. Or make the sync flushing use the async clflush path
+		 *   using mandatory fences underneath. Currently the below
+		 *   async flush happens after we bind the object.
+		 */
+		if (unlikely(obj->cache_dirty & ~obj->cache_coherent)) {
+			if (i915_gem_clflush_object(obj, 0))
+				flags &= ~EXEC_OBJECT_ASYNC;
+		}
+
+		/* We only need to await on the first request */
+		if (err == 0 && !(flags & EXEC_OBJECT_ASYNC)) {
+			err = i915_request_await_object
+				(eb_find_first_request_added(eb), obj,
+				 flags & EXEC_OBJECT_WRITE);
+		}
+
+		for_each_batch_add_order(eb, j) {
+			if (err)
+				break;
+			if (!eb->requests[j])
+				continue;
+
+			err = _i915_vma_move_to_active(vma, eb->requests[j],
+						       j ? NULL :
+						       eb->composite_fence ?
+						       eb->composite_fence :
+						       &eb->requests[j]->fence,
+						       flags | __EXEC_OBJECT_NO_RESERVE |
+						       __EXEC_OBJECT_NO_REQUEST_AWAIT);
+		}
+	}
+
+#ifdef CONFIG_MMU_NOTIFIER
+	if (!err && (eb->args->flags & __EXEC_USERPTR_USED)) {
+		read_lock(&eb->i915->mm.notifier_lock);
+
+		/*
+		 * count is always at least 1, otherwise __EXEC_USERPTR_USED
+		 * could not have been set
+		 */
+		for (i = 0; i < count; i++) {
+			struct eb_vma *ev = &eb->vma[i];
+			struct drm_i915_gem_object *obj = ev->vma->obj;
+
+			if (!i915_gem_object_is_userptr(obj))
+				continue;
+
+			err = i915_gem_object_userptr_submit_done(obj);
+			if (err)
+				break;
+		}
+
+		read_unlock(&eb->i915->mm.notifier_lock);
+	}
+#endif
+
+	if (unlikely(err))
+		goto err_skip;
+
+	/* Unconditionally flush any chipset caches (for streaming writes). */
+	intel_gt_chipset_flush(eb->gt);
+	eb_capture_commit(eb);
+
+	return 0;
+
+err_skip:
+	for_each_batch_create_order(eb, j) {
+		if (!eb->requests[j])
+			break;
+
+		i915_request_set_error_once(eb->requests[j], err);
+	}
+	return err;
+}
+
+static int i915_gem_check_execbuffer(struct drm_i915_private *i915,
+				     struct drm_i915_gem_execbuffer2 *exec)
+{
+	if (exec->flags & __I915_EXEC_ILLEGAL_FLAGS)
+		return -EINVAL;
+
+	/* Kernel clipping was a DRI1 misfeature */
+	if (!(exec->flags & (I915_EXEC_FENCE_ARRAY |
+			     I915_EXEC_USE_EXTENSIONS))) {
+		if (exec->num_cliprects || exec->cliprects_ptr)
+			return -EINVAL;
+	}
+
+	if (exec->DR4 == 0xffffffff) {
+		drm_dbg(&i915->drm, "UXA submitting garbage DR4, fixing up\n");
+		exec->DR4 = 0;
+	}
+	if (exec->DR1 || exec->DR4)
+		return -EINVAL;
+
+	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int i915_reset_gen7_sol_offsets(struct i915_request *rq)
+{
+	u32 *cs;
+	int i;
+
+	if (GRAPHICS_VER(rq->i915) != 7 || rq->engine->id != RCS0) {
+		drm_dbg(&rq->i915->drm, "sol reset is gen7/rcs only\n");
+		return -EINVAL;
+	}
+
+	cs = intel_ring_begin(rq, 4 * 2 + 2);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(4);
+	for (i = 0; i < 4; i++) {
+		*cs++ = i915_mmio_reg_offset(GEN7_SO_WRITE_OFFSET(i));
+		*cs++ = 0;
+	}
+	*cs++ = MI_NOOP;
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static struct i915_vma *
+shadow_batch_pin(struct i915_execbuffer *eb,
+		 struct drm_i915_gem_object *obj,
+		 struct i915_address_space *vm,
+		 unsigned int flags)
+{
+	struct i915_vma *vma;
+	int err;
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma))
+		return vma;
+
+	err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
+	if (err)
+		return ERR_PTR(err);
+
+	return vma;
+}
+
+static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i915_vma *vma)
+{
+	/*
+	 * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
+	 * batch" bit. Hence we need to pin secure batches into the global gtt.
+	 * hsw should have this fixed, but bdw mucks it up again. */
+	if (eb->batch_flags & I915_DISPATCH_SECURE)
+		return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
+
+	return NULL;
+}
+
+static int eb_parse(struct i915_execbuffer *eb)
+{
+	struct drm_i915_private *i915 = eb->i915;
+	struct intel_gt_buffer_pool_node *pool = eb->batch_pool;
+	struct i915_vma *shadow, *trampoline, *batch;
+	unsigned long len;
+	int err;
+
+	if (!eb_use_cmdparser(eb)) {
+		batch = eb_dispatch_secure(eb, eb->batches[0]->vma);
+		if (IS_ERR(batch))
+			return PTR_ERR(batch);
+
+		goto secure_batch;
+	}
+
+	if (intel_context_is_parallel(eb->context))
+		return -EINVAL;
+
+	len = eb->batch_len[0];
+	if (!CMDPARSER_USES_GGTT(eb->i915)) {
+		/*
+		 * ppGTT backed shadow buffers must be mapped RO, to prevent
+		 * post-scan tampering
+		 */
+		if (!eb->context->vm->has_read_only) {
+			drm_dbg(&i915->drm,
+				"Cannot prevent post-scan tampering without RO capable vm\n");
+			return -EINVAL;
+		}
+	} else {
+		len += I915_CMD_PARSER_TRAMPOLINE_SIZE;
+	}
+	if (unlikely(len < eb->batch_len[0])) /* last paranoid check of overflow */
+		return -EINVAL;
+
+	if (!pool) {
+		pool = intel_gt_get_buffer_pool(eb->gt, len,
+						I915_MAP_WB);
+		if (IS_ERR(pool))
+			return PTR_ERR(pool);
+		eb->batch_pool = pool;
+	}
+
+	err = i915_gem_object_lock(pool->obj, &eb->ww);
+	if (err)
+		return err;
+
+	shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
+	if (IS_ERR(shadow))
+		return PTR_ERR(shadow);
+
+	intel_gt_buffer_pool_mark_used(pool);
+	i915_gem_object_set_readonly(shadow->obj);
+	shadow->private = pool;
+
+	trampoline = NULL;
+	if (CMDPARSER_USES_GGTT(eb->i915)) {
+		trampoline = shadow;
+
+		shadow = shadow_batch_pin(eb, pool->obj,
+					  &eb->gt->ggtt->vm,
+					  PIN_GLOBAL);
+		if (IS_ERR(shadow))
+			return PTR_ERR(shadow);
+
+		shadow->private = pool;
+
+		eb->batch_flags |= I915_DISPATCH_SECURE;
+	}
+
+	batch = eb_dispatch_secure(eb, shadow);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	err = dma_resv_reserve_fences(shadow->obj->base.resv, 1);
+	if (err)
+		return err;
+
+	err = intel_engine_cmd_parser(eb->context->engine,
+				      eb->batches[0]->vma,
+				      eb->batch_start_offset,
+				      eb->batch_len[0],
+				      shadow, trampoline);
+	if (err)
+		return err;
+
+	eb->batches[0] = &eb->vma[eb->buffer_count++];
+	eb->batches[0]->vma = i915_vma_get(shadow);
+	eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN;
+
+	eb->trampoline = trampoline;
+	eb->batch_start_offset = 0;
+
+secure_batch:
+	if (batch) {
+		if (intel_context_is_parallel(eb->context))
+			return -EINVAL;
+
+		eb->batches[0] = &eb->vma[eb->buffer_count++];
+		eb->batches[0]->flags = __EXEC_OBJECT_HAS_PIN;
+		eb->batches[0]->vma = i915_vma_get(batch);
+	}
+	return 0;
+}
+
+static int eb_request_submit(struct i915_execbuffer *eb,
+			     struct i915_request *rq,
+			     struct i915_vma *batch,
+			     u64 batch_len)
+{
+	int err;
+
+	if (intel_context_nopreempt(rq->context))
+		__set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags);
+
+	if (eb->args->flags & I915_EXEC_GEN7_SOL_RESET) {
+		err = i915_reset_gen7_sol_offsets(rq);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * After we completed waiting for other engines (using HW semaphores)
+	 * then we can signal that this request/batch is ready to run. This
+	 * allows us to determine if the batch is still waiting on the GPU
+	 * or actually running by checking the breadcrumb.
+	 */
+	if (rq->context->engine->emit_init_breadcrumb) {
+		err = rq->context->engine->emit_init_breadcrumb(rq);
+		if (err)
+			return err;
+	}
+
+	err = rq->context->engine->emit_bb_start(rq,
+						 i915_vma_offset(batch) +
+						 eb->batch_start_offset,
+						 batch_len,
+						 eb->batch_flags);
+	if (err)
+		return err;
+
+	if (eb->trampoline) {
+		GEM_BUG_ON(intel_context_is_parallel(rq->context));
+		GEM_BUG_ON(eb->batch_start_offset);
+		err = rq->context->engine->emit_bb_start(rq,
+							 i915_vma_offset(eb->trampoline) +
+							 batch_len, 0, 0);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int eb_submit(struct i915_execbuffer *eb)
+{
+	unsigned int i;
+	int err;
+
+	err = eb_move_to_gpu(eb);
+
+	for_each_batch_create_order(eb, i) {
+		if (!eb->requests[i])
+			break;
+
+		trace_i915_request_queue(eb->requests[i], eb->batch_flags);
+		if (!err)
+			err = eb_request_submit(eb, eb->requests[i],
+						eb->batches[i]->vma,
+						eb->batch_len[i]);
+	}
+
+	return err;
+}
+
+/*
+ * Find one BSD ring to dispatch the corresponding BSD command.
+ * The engine index is returned.
+ */
+static unsigned int
+gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
+			 struct drm_file *file)
+{
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+
+	/* Check whether the file_priv has already selected one ring. */
+	if ((int)file_priv->bsd_engine < 0)
+		file_priv->bsd_engine =
+			get_random_u32_below(dev_priv->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO]);
+
+	return file_priv->bsd_engine;
+}
+
+static const enum intel_engine_id user_ring_map[] = {
+	[I915_EXEC_DEFAULT]	= RCS0,
+	[I915_EXEC_RENDER]	= RCS0,
+	[I915_EXEC_BLT]		= BCS0,
+	[I915_EXEC_BSD]		= VCS0,
+	[I915_EXEC_VEBOX]	= VECS0
+};
+
+static struct i915_request *eb_throttle(struct i915_execbuffer *eb, struct intel_context *ce)
+{
+	struct intel_ring *ring = ce->ring;
+	struct intel_timeline *tl = ce->timeline;
+	struct i915_request *rq;
+
+	/*
+	 * Completely unscientific finger-in-the-air estimates for suitable
+	 * maximum user request size (to avoid blocking) and then backoff.
+	 */
+	if (intel_ring_update_space(ring) >= PAGE_SIZE)
+		return NULL;
+
+	/*
+	 * Find a request that after waiting upon, there will be at least half
+	 * the ring available. The hysteresis allows us to compete for the
+	 * shared ring and should mean that we sleep less often prior to
+	 * claiming our resources, but not so long that the ring completely
+	 * drains before we can submit our next request.
+	 */
+	list_for_each_entry(rq, &tl->requests, link) {
+		if (rq->ring != ring)
+			continue;
+
+		if (__intel_ring_space(rq->postfix,
+				       ring->emit, ring->size) > ring->size / 2)
+			break;
+	}
+	if (&rq->link == &tl->requests)
+		return NULL; /* weird, we will check again later for real */
+
+	return i915_request_get(rq);
+}
+
+static int eb_pin_timeline(struct i915_execbuffer *eb, struct intel_context *ce,
+			   bool throttle)
+{
+	struct intel_timeline *tl;
+	struct i915_request *rq = NULL;
+
+	/*
+	 * Take a local wakeref for preparing to dispatch the execbuf as
+	 * we expect to access the hardware fairly frequently in the
+	 * process, and require the engine to be kept awake between accesses.
+	 * Upon dispatch, we acquire another prolonged wakeref that we hold
+	 * until the timeline is idle, which in turn releases the wakeref
+	 * taken on the engine, and the parent device.
+	 */
+	tl = intel_context_timeline_lock(ce);
+	if (IS_ERR(tl))
+		return PTR_ERR(tl);
+
+	intel_context_enter(ce);
+	if (throttle)
+		rq = eb_throttle(eb, ce);
+	intel_context_timeline_unlock(tl);
+
+	if (rq) {
+		bool nonblock = eb->file->filp->f_flags & O_NONBLOCK;
+		long timeout = nonblock ? 0 : MAX_SCHEDULE_TIMEOUT;
+
+		if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE,
+				      timeout) < 0) {
+			i915_request_put(rq);
+
+			/*
+			 * Error path, cannot use intel_context_timeline_lock as
+			 * that is user interruptable and this clean up step
+			 * must be done.
+			 */
+			mutex_lock(&ce->timeline->mutex);
+			intel_context_exit(ce);
+			mutex_unlock(&ce->timeline->mutex);
+
+			if (nonblock)
+				return -EWOULDBLOCK;
+			else
+				return -EINTR;
+		}
+		i915_request_put(rq);
+	}
+
+	return 0;
+}
+
+static int eb_pin_engine(struct i915_execbuffer *eb, bool throttle)
+{
+	struct intel_context *ce = eb->context, *child;
+	int err;
+	int i = 0, j = 0;
+
+	GEM_BUG_ON(eb->args->flags & __EXEC_ENGINE_PINNED);
+
+	if (unlikely(intel_context_is_banned(ce)))
+		return -EIO;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	err = intel_context_pin_ww(ce, &eb->ww);
+	if (err)
+		return err;
+	for_each_child(ce, child) {
+		err = intel_context_pin_ww(child, &eb->ww);
+		GEM_BUG_ON(err);	/* perma-pinned should incr a counter */
+	}
+
+	for_each_child(ce, child) {
+		err = eb_pin_timeline(eb, child, throttle);
+		if (err)
+			goto unwind;
+		++i;
+	}
+	err = eb_pin_timeline(eb, ce, throttle);
+	if (err)
+		goto unwind;
+
+	eb->args->flags |= __EXEC_ENGINE_PINNED;
+	return 0;
+
+unwind:
+	for_each_child(ce, child) {
+		if (j++ < i) {
+			mutex_lock(&child->timeline->mutex);
+			intel_context_exit(child);
+			mutex_unlock(&child->timeline->mutex);
+		}
+	}
+	for_each_child(ce, child)
+		intel_context_unpin(child);
+	intel_context_unpin(ce);
+	return err;
+}
+
+static void eb_unpin_engine(struct i915_execbuffer *eb)
+{
+	struct intel_context *ce = eb->context, *child;
+
+	if (!(eb->args->flags & __EXEC_ENGINE_PINNED))
+		return;
+
+	eb->args->flags &= ~__EXEC_ENGINE_PINNED;
+
+	for_each_child(ce, child) {
+		mutex_lock(&child->timeline->mutex);
+		intel_context_exit(child);
+		mutex_unlock(&child->timeline->mutex);
+
+		intel_context_unpin(child);
+	}
+
+	mutex_lock(&ce->timeline->mutex);
+	intel_context_exit(ce);
+	mutex_unlock(&ce->timeline->mutex);
+
+	intel_context_unpin(ce);
+}
+
+static unsigned int
+eb_select_legacy_ring(struct i915_execbuffer *eb)
+{
+	struct drm_i915_private *i915 = eb->i915;
+	struct drm_i915_gem_execbuffer2 *args = eb->args;
+	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
+
+	if (user_ring_id != I915_EXEC_BSD &&
+	    (args->flags & I915_EXEC_BSD_MASK)) {
+		drm_dbg(&i915->drm,
+			"execbuf with non bsd ring but with invalid "
+			"bsd dispatch flags: %d\n", (int)(args->flags));
+		return -1;
+	}
+
+	if (user_ring_id == I915_EXEC_BSD &&
+	    i915->engine_uabi_class_count[I915_ENGINE_CLASS_VIDEO] > 1) {
+		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;
+
+		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
+			bsd_idx = gen8_dispatch_bsd_engine(i915, eb->file);
+		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
+			   bsd_idx <= I915_EXEC_BSD_RING2) {
+			bsd_idx >>= I915_EXEC_BSD_SHIFT;
+			bsd_idx--;
+		} else {
+			drm_dbg(&i915->drm,
+				"execbuf with unknown bsd ring: %u\n",
+				bsd_idx);
+			return -1;
+		}
+
+		return _VCS(bsd_idx);
+	}
+
+	if (user_ring_id >= ARRAY_SIZE(user_ring_map)) {
+		drm_dbg(&i915->drm, "execbuf with unknown ring: %u\n",
+			user_ring_id);
+		return -1;
+	}
+
+	return user_ring_map[user_ring_id];
+}
+
+static int
+eb_select_engine(struct i915_execbuffer *eb)
+{
+	struct intel_context *ce, *child;
+	struct intel_gt *gt;
+	unsigned int idx;
+	int err;
+
+	if (i915_gem_context_user_engines(eb->gem_context))
+		idx = eb->args->flags & I915_EXEC_RING_MASK;
+	else
+		idx = eb_select_legacy_ring(eb);
+
+	ce = i915_gem_context_get_engine(eb->gem_context, idx);
+	if (IS_ERR(ce))
+		return PTR_ERR(ce);
+
+	if (intel_context_is_parallel(ce)) {
+		if (eb->buffer_count < ce->parallel.number_children + 1) {
+			intel_context_put(ce);
+			return -EINVAL;
+		}
+		if (eb->batch_start_offset || eb->args->batch_len) {
+			intel_context_put(ce);
+			return -EINVAL;
+		}
+	}
+	eb->num_batches = ce->parallel.number_children + 1;
+	gt = ce->engine->gt;
+
+	for_each_child(ce, child)
+		intel_context_get(child);
+	intel_gt_pm_get(gt);
+	/*
+	 * Keep GT0 active on MTL so that i915_vma_parked() doesn't
+	 * free VMAs while execbuf ioctl is validating VMAs.
+	 */
+	if (gt->info.id)
+		intel_gt_pm_get(to_gt(gt->i915));
+
+	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
+		err = intel_context_alloc_state(ce);
+		if (err)
+			goto err;
+	}
+	for_each_child(ce, child) {
+		if (!test_bit(CONTEXT_ALLOC_BIT, &child->flags)) {
+			err = intel_context_alloc_state(child);
+			if (err)
+				goto err;
+		}
+	}
+
+	/*
+	 * ABI: Before userspace accesses the GPU (e.g. execbuffer), report
+	 * EIO if the GPU is already wedged.
+	 */
+	err = intel_gt_terminally_wedged(ce->engine->gt);
+	if (err)
+		goto err;
+
+	if (!i915_vm_tryget(ce->vm)) {
+		err = -ENOENT;
+		goto err;
+	}
+
+	eb->context = ce;
+	eb->gt = ce->engine->gt;
+
+	/*
+	 * Make sure engine pool stays alive even if we call intel_context_put
+	 * during ww handling. The pool is destroyed when last pm reference
+	 * is dropped, which breaks our -EDEADLK handling.
+	 */
+	return err;
+
+err:
+	if (gt->info.id)
+		intel_gt_pm_put(to_gt(gt->i915));
+
+	intel_gt_pm_put(gt);
+	for_each_child(ce, child)
+		intel_context_put(child);
+	intel_context_put(ce);
+	return err;
+}
+
+static void
+eb_put_engine(struct i915_execbuffer *eb)
+{
+	struct intel_context *child;
+
+	i915_vm_put(eb->context->vm);
+	/*
+	 * This works in conjunction with eb_select_engine() to prevent
+	 * i915_vma_parked() from interfering while execbuf validates vmas.
+	 */
+	if (eb->gt->info.id)
+		intel_gt_pm_put(to_gt(eb->gt->i915));
+	intel_gt_pm_put(eb->gt);
+	for_each_child(eb->context, child)
+		intel_context_put(child);
+	intel_context_put(eb->context);
+}
+
+static void
+__free_fence_array(struct eb_fence *fences, unsigned int n)
+{
+	while (n--) {
+		drm_syncobj_put(ptr_mask_bits(fences[n].syncobj, 2));
+		dma_fence_put(fences[n].dma_fence);
+		dma_fence_chain_free(fences[n].chain_fence);
+	}
+	kvfree(fences);
+}
+
+static int
+add_timeline_fence_array(struct i915_execbuffer *eb,
+			 const struct drm_i915_gem_execbuffer_ext_timeline_fences *timeline_fences)
+{
+	struct drm_i915_gem_exec_fence __user *user_fences;
+	u64 __user *user_values;
+	struct eb_fence *f;
+	u64 nfences;
+	int err = 0;
+
+	nfences = timeline_fences->fence_count;
+	if (!nfences)
+		return 0;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (nfences > min_t(unsigned long,
+			    ULONG_MAX / sizeof(*user_fences),
+			    SIZE_MAX / sizeof(*f)) - eb->num_fences)
+		return -EINVAL;
+
+	user_fences = u64_to_user_ptr(timeline_fences->handles_ptr);
+	if (!access_ok(user_fences, nfences * sizeof(*user_fences)))
+		return -EFAULT;
+
+	user_values = u64_to_user_ptr(timeline_fences->values_ptr);
+	if (!access_ok(user_values, nfences * sizeof(*user_values)))
+		return -EFAULT;
+
+	f = krealloc(eb->fences,
+		     (eb->num_fences + nfences) * sizeof(*f),
+		     __GFP_NOWARN | GFP_KERNEL);
+	if (!f)
+		return -ENOMEM;
+
+	eb->fences = f;
+	f += eb->num_fences;
+
+	BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+		     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+	while (nfences--) {
+		struct drm_i915_gem_exec_fence user_fence;
+		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
+		u64 point;
+
+		if (__copy_from_user(&user_fence,
+				     user_fences++,
+				     sizeof(user_fence)))
+			return -EFAULT;
+
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
+			return -EINVAL;
+
+		if (__get_user(point, user_values++))
+			return -EFAULT;
+
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+		if (!syncobj) {
+			drm_dbg(&eb->i915->drm,
+				"Invalid syncobj handle provided\n");
+			return -ENOENT;
+		}
+
+		fence = drm_syncobj_fence_get(syncobj);
+
+		if (!fence && user_fence.flags &&
+		    !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+			drm_dbg(&eb->i915->drm,
+				"Syncobj handle has no fence\n");
+			drm_syncobj_put(syncobj);
+			return -EINVAL;
+		}
+
+		if (fence)
+			err = dma_fence_chain_find_seqno(&fence, point);
+
+		if (err && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+			drm_dbg(&eb->i915->drm,
+				"Syncobj handle missing requested point %llu\n",
+				point);
+			dma_fence_put(fence);
+			drm_syncobj_put(syncobj);
+			return err;
+		}
+
+		/*
+		 * A point might have been signaled already and
+		 * garbage collected from the timeline. In this case
+		 * just ignore the point and carry on.
+		 */
+		if (!fence && !(user_fence.flags & I915_EXEC_FENCE_SIGNAL)) {
+			drm_syncobj_put(syncobj);
+			continue;
+		}
+
+		/*
+		 * For timeline syncobjs we need to preallocate chains for
+		 * later signaling.
+		 */
+		if (point != 0 && user_fence.flags & I915_EXEC_FENCE_SIGNAL) {
+			/*
+			 * Waiting and signaling the same point (when point !=
+			 * 0) would break the timeline.
+			 */
+			if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+				drm_dbg(&eb->i915->drm,
+					"Trying to wait & signal the same timeline point.\n");
+				dma_fence_put(fence);
+				drm_syncobj_put(syncobj);
+				return -EINVAL;
+			}
+
+			f->chain_fence = dma_fence_chain_alloc();
+			if (!f->chain_fence) {
+				drm_syncobj_put(syncobj);
+				dma_fence_put(fence);
+				return -ENOMEM;
+			}
+		} else {
+			f->chain_fence = NULL;
+		}
+
+		f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		f->dma_fence = fence;
+		f->value = point;
+		f++;
+		eb->num_fences++;
+	}
+
+	return 0;
+}
+
+static int add_fence_array(struct i915_execbuffer *eb)
+{
+	struct drm_i915_gem_execbuffer2 *args = eb->args;
+	struct drm_i915_gem_exec_fence __user *user;
+	unsigned long num_fences = args->num_cliprects;
+	struct eb_fence *f;
+
+	if (!(args->flags & I915_EXEC_FENCE_ARRAY))
+		return 0;
+
+	if (!num_fences)
+		return 0;
+
+	/* Check multiplication overflow for access_ok() and kvmalloc_array() */
+	BUILD_BUG_ON(sizeof(size_t) > sizeof(unsigned long));
+	if (num_fences > min_t(unsigned long,
+			       ULONG_MAX / sizeof(*user),
+			       SIZE_MAX / sizeof(*f) - eb->num_fences))
+		return -EINVAL;
+
+	user = u64_to_user_ptr(args->cliprects_ptr);
+	if (!access_ok(user, num_fences * sizeof(*user)))
+		return -EFAULT;
+
+	f = krealloc(eb->fences,
+		     (eb->num_fences + num_fences) * sizeof(*f),
+		     __GFP_NOWARN | GFP_KERNEL);
+	if (!f)
+		return -ENOMEM;
+
+	eb->fences = f;
+	f += eb->num_fences;
+	while (num_fences--) {
+		struct drm_i915_gem_exec_fence user_fence;
+		struct drm_syncobj *syncobj;
+		struct dma_fence *fence = NULL;
+
+		if (__copy_from_user(&user_fence, user++, sizeof(user_fence)))
+			return -EFAULT;
+
+		if (user_fence.flags & __I915_EXEC_FENCE_UNKNOWN_FLAGS)
+			return -EINVAL;
+
+		syncobj = drm_syncobj_find(eb->file, user_fence.handle);
+		if (!syncobj) {
+			drm_dbg(&eb->i915->drm,
+				"Invalid syncobj handle provided\n");
+			return -ENOENT;
+		}
+
+		if (user_fence.flags & I915_EXEC_FENCE_WAIT) {
+			fence = drm_syncobj_fence_get(syncobj);
+			if (!fence) {
+				drm_dbg(&eb->i915->drm,
+					"Syncobj handle has no fence\n");
+				drm_syncobj_put(syncobj);
+				return -EINVAL;
+			}
+		}
+
+		BUILD_BUG_ON(~(ARCH_KMALLOC_MINALIGN - 1) &
+			     ~__I915_EXEC_FENCE_UNKNOWN_FLAGS);
+
+		f->syncobj = ptr_pack_bits(syncobj, user_fence.flags, 2);
+		f->dma_fence = fence;
+		f->value = 0;
+		f->chain_fence = NULL;
+		f++;
+		eb->num_fences++;
+	}
+
+	return 0;
+}
+
+static void put_fence_array(struct eb_fence *fences, int num_fences)
+{
+	if (fences)
+		__free_fence_array(fences, num_fences);
+}
+
+static int
+await_fence_array(struct i915_execbuffer *eb,
+		  struct i915_request *rq)
+{
+	unsigned int n;
+	int err;
+
+	for (n = 0; n < eb->num_fences; n++) {
+		if (!eb->fences[n].dma_fence)
+			continue;
+
+		err = i915_request_await_dma_fence(rq, eb->fences[n].dma_fence);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+static void signal_fence_array(const struct i915_execbuffer *eb,
+			       struct dma_fence * const fence)
+{
+	unsigned int n;
+
+	for (n = 0; n < eb->num_fences; n++) {
+		struct drm_syncobj *syncobj;
+		unsigned int flags;
+
+		syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2);
+		if (!(flags & I915_EXEC_FENCE_SIGNAL))
+			continue;
+
+		if (eb->fences[n].chain_fence) {
+			drm_syncobj_add_point(syncobj,
+					      eb->fences[n].chain_fence,
+					      fence,
+					      eb->fences[n].value);
+			/*
+			 * The chain's ownership is transferred to the
+			 * timeline.
+			 */
+			eb->fences[n].chain_fence = NULL;
+		} else {
+			drm_syncobj_replace_fence(syncobj, fence);
+		}
+	}
+}
+
+static int
+parse_timeline_fences(struct i915_user_extension __user *ext, void *data)
+{
+	struct i915_execbuffer *eb = data;
+	struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences;
+
+	if (copy_from_user(&timeline_fences, ext, sizeof(timeline_fences)))
+		return -EFAULT;
+
+	return add_timeline_fence_array(eb, &timeline_fences);
+}
+
+static void retire_requests(struct intel_timeline *tl, struct i915_request *end)
+{
+	struct i915_request *rq, *rn;
+
+	list_for_each_entry_safe(rq, rn, &tl->requests, link)
+		if (rq == end || !i915_request_retire(rq))
+			break;
+}
+
+static int eb_request_add(struct i915_execbuffer *eb, struct i915_request *rq,
+			  int err, bool last_parallel)
+{
+	struct intel_timeline * const tl = i915_request_timeline(rq);
+	struct i915_sched_attr attr = {};
+	struct i915_request *prev;
+
+	lockdep_assert_held(&tl->mutex);
+	lockdep_unpin_lock(&tl->mutex, rq->cookie);
+
+	trace_i915_request_add(rq);
+
+	prev = __i915_request_commit(rq);
+
+	/* Check that the context wasn't destroyed before submission */
+	if (likely(!intel_context_is_closed(eb->context))) {
+		attr = eb->gem_context->sched;
+	} else {
+		/* Serialise with context_close via the add_to_timeline */
+		i915_request_set_error_once(rq, -ENOENT);
+		__i915_request_skip(rq);
+		err = -ENOENT; /* override any transient errors */
+	}
+
+	if (intel_context_is_parallel(eb->context)) {
+		if (err) {
+			__i915_request_skip(rq);
+			set_bit(I915_FENCE_FLAG_SKIP_PARALLEL,
+				&rq->fence.flags);
+		}
+		if (last_parallel)
+			set_bit(I915_FENCE_FLAG_SUBMIT_PARALLEL,
+				&rq->fence.flags);
+	}
+
+	__i915_request_queue(rq, &attr);
+
+	/* Try to clean up the client's timeline after submitting the request */
+	if (prev)
+		retire_requests(tl, prev);
+
+	mutex_unlock(&tl->mutex);
+
+	return err;
+}
+
+static int eb_requests_add(struct i915_execbuffer *eb, int err)
+{
+	int i;
+
+	/*
+	 * We iterate in reverse order of creation to release timeline mutexes in
+	 * same order.
+	 */
+	for_each_batch_add_order(eb, i) {
+		struct i915_request *rq = eb->requests[i];
+
+		if (!rq)
+			continue;
+		err |= eb_request_add(eb, rq, err, i == 0);
+	}
+
+	return err;
+}
+
+static const i915_user_extension_fn execbuf_extensions[] = {
+	[DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES] = parse_timeline_fences,
+};
+
+static int
+parse_execbuf2_extensions(struct drm_i915_gem_execbuffer2 *args,
+			  struct i915_execbuffer *eb)
+{
+	if (!(args->flags & I915_EXEC_USE_EXTENSIONS))
+		return 0;
+
+	/* The execbuf2 extension mechanism reuses cliprects_ptr. So we cannot
+	 * have another flag also using it at the same time.
+	 */
+	if (eb->args->flags & I915_EXEC_FENCE_ARRAY)
+		return -EINVAL;
+
+	if (args->num_cliprects != 0)
+		return -EINVAL;
+
+	return i915_user_extensions(u64_to_user_ptr(args->cliprects_ptr),
+				    execbuf_extensions,
+				    ARRAY_SIZE(execbuf_extensions),
+				    eb);
+}
+
+static void eb_requests_get(struct i915_execbuffer *eb)
+{
+	unsigned int i;
+
+	for_each_batch_create_order(eb, i) {
+		if (!eb->requests[i])
+			break;
+
+		i915_request_get(eb->requests[i]);
+	}
+}
+
+static void eb_requests_put(struct i915_execbuffer *eb)
+{
+	unsigned int i;
+
+	for_each_batch_create_order(eb, i) {
+		if (!eb->requests[i])
+			break;
+
+		i915_request_put(eb->requests[i]);
+	}
+}
+
+static struct sync_file *
+eb_composite_fence_create(struct i915_execbuffer *eb, int out_fence_fd)
+{
+	struct sync_file *out_fence = NULL;
+	struct dma_fence_array *fence_array;
+	struct dma_fence **fences;
+	unsigned int i;
+
+	GEM_BUG_ON(!intel_context_is_parent(eb->context));
+
+	fences = kmalloc_array(eb->num_batches, sizeof(*fences), GFP_KERNEL);
+	if (!fences)
+		return ERR_PTR(-ENOMEM);
+
+	for_each_batch_create_order(eb, i) {
+		fences[i] = &eb->requests[i]->fence;
+		__set_bit(I915_FENCE_FLAG_COMPOSITE,
+			  &eb->requests[i]->fence.flags);
+	}
+
+	fence_array = dma_fence_array_create(eb->num_batches,
+					     fences,
+					     eb->context->parallel.fence_context,
+					     eb->context->parallel.seqno++,
+					     false);
+	if (!fence_array) {
+		kfree(fences);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	/* Move ownership to the dma_fence_array created above */
+	for_each_batch_create_order(eb, i)
+		dma_fence_get(fences[i]);
+
+	if (out_fence_fd != -1) {
+		out_fence = sync_file_create(&fence_array->base);
+		/* sync_file now owns fence_arry, drop creation ref */
+		dma_fence_put(&fence_array->base);
+		if (!out_fence)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	eb->composite_fence = &fence_array->base;
+
+	return out_fence;
+}
+
+static struct sync_file *
+eb_fences_add(struct i915_execbuffer *eb, struct i915_request *rq,
+	      struct dma_fence *in_fence, int out_fence_fd)
+{
+	struct sync_file *out_fence = NULL;
+	int err;
+
+	if (unlikely(eb->gem_context->syncobj)) {
+		struct dma_fence *fence;
+
+		fence = drm_syncobj_fence_get(eb->gem_context->syncobj);
+		err = i915_request_await_dma_fence(rq, fence);
+		dma_fence_put(fence);
+		if (err)
+			return ERR_PTR(err);
+	}
+
+	if (in_fence) {
+		if (eb->args->flags & I915_EXEC_FENCE_SUBMIT)
+			err = i915_request_await_execution(rq, in_fence);
+		else
+			err = i915_request_await_dma_fence(rq, in_fence);
+		if (err < 0)
+			return ERR_PTR(err);
+	}
+
+	if (eb->fences) {
+		err = await_fence_array(eb, rq);
+		if (err)
+			return ERR_PTR(err);
+	}
+
+	if (intel_context_is_parallel(eb->context)) {
+		out_fence = eb_composite_fence_create(eb, out_fence_fd);
+		if (IS_ERR(out_fence))
+			return ERR_PTR(-ENOMEM);
+	} else if (out_fence_fd != -1) {
+		out_fence = sync_file_create(&rq->fence);
+		if (!out_fence)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	return out_fence;
+}
+
+static struct intel_context *
+eb_find_context(struct i915_execbuffer *eb, unsigned int context_number)
+{
+	struct intel_context *child;
+
+	if (likely(context_number == 0))
+		return eb->context;
+
+	for_each_child(eb->context, child)
+		if (!--context_number)
+			return child;
+
+	GEM_BUG_ON("Context not found");
+
+	return NULL;
+}
+
+static struct sync_file *
+eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
+		   int out_fence_fd)
+{
+	struct sync_file *out_fence = NULL;
+	unsigned int i;
+
+	for_each_batch_create_order(eb, i) {
+		/* Allocate a request for this batch buffer nice and early. */
+		eb->requests[i] = i915_request_create(eb_find_context(eb, i));
+		if (IS_ERR(eb->requests[i])) {
+			out_fence = ERR_CAST(eb->requests[i]);
+			eb->requests[i] = NULL;
+			return out_fence;
+		}
+
+		/*
+		 * Only the first request added (committed to backend) has to
+		 * take the in fences into account as all subsequent requests
+		 * will have fences inserted inbetween them.
+		 */
+		if (i + 1 == eb->num_batches) {
+			out_fence = eb_fences_add(eb, eb->requests[i],
+						  in_fence, out_fence_fd);
+			if (IS_ERR(out_fence))
+				return out_fence;
+		}
+
+		/*
+		 * Not really on stack, but we don't want to call
+		 * kfree on the batch_snapshot when we put it, so use the
+		 * _onstack interface.
+		 */
+		if (eb->batches[i]->vma)
+			eb->requests[i]->batch_res =
+				i915_vma_resource_get(eb->batches[i]->vma->resource);
+		if (eb->batch_pool) {
+			GEM_BUG_ON(intel_context_is_parallel(eb->context));
+			intel_gt_buffer_pool_mark_active(eb->batch_pool,
+							 eb->requests[i]);
+		}
+	}
+
+	return out_fence;
+}
+
+static int
+i915_gem_do_execbuffer(struct drm_device *dev,
+		       struct drm_file *file,
+		       struct drm_i915_gem_execbuffer2 *args,
+		       struct drm_i915_gem_exec_object2 *exec)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct i915_execbuffer eb;
+	struct dma_fence *in_fence = NULL;
+	struct sync_file *out_fence = NULL;
+	int out_fence_fd = -1;
+	int err;
+
+	BUILD_BUG_ON(__EXEC_INTERNAL_FLAGS & ~__I915_EXEC_ILLEGAL_FLAGS);
+	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS &
+		     ~__EXEC_OBJECT_UNKNOWN_FLAGS);
+
+	eb.i915 = i915;
+	eb.file = file;
+	eb.args = args;
+	if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC))
+		args->flags |= __EXEC_HAS_RELOC;
+
+	eb.exec = exec;
+	eb.vma = (struct eb_vma *)(exec + args->buffer_count + 1);
+	eb.vma[0].vma = NULL;
+	eb.batch_pool = NULL;
+
+	eb.invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
+	reloc_cache_init(&eb.reloc_cache, eb.i915);
+
+	eb.buffer_count = args->buffer_count;
+	eb.batch_start_offset = args->batch_start_offset;
+	eb.trampoline = NULL;
+
+	eb.fences = NULL;
+	eb.num_fences = 0;
+
+	eb_capture_list_clear(&eb);
+
+	memset(eb.requests, 0, sizeof(struct i915_request *) *
+	       ARRAY_SIZE(eb.requests));
+	eb.composite_fence = NULL;
+
+	eb.batch_flags = 0;
+	if (args->flags & I915_EXEC_SECURE) {
+		if (GRAPHICS_VER(i915) >= 11)
+			return -ENODEV;
+
+		/* Return -EPERM to trigger fallback code on old binaries. */
+		if (!HAS_SECURE_BATCHES(i915))
+			return -EPERM;
+
+		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		eb.batch_flags |= I915_DISPATCH_SECURE;
+	}
+	if (args->flags & I915_EXEC_IS_PINNED)
+		eb.batch_flags |= I915_DISPATCH_PINNED;
+
+	err = parse_execbuf2_extensions(args, &eb);
+	if (err)
+		goto err_ext;
+
+	err = add_fence_array(&eb);
+	if (err)
+		goto err_ext;
+
+#define IN_FENCES (I915_EXEC_FENCE_IN | I915_EXEC_FENCE_SUBMIT)
+	if (args->flags & IN_FENCES) {
+		if ((args->flags & IN_FENCES) == IN_FENCES)
+			return -EINVAL;
+
+		in_fence = sync_file_get_fence(lower_32_bits(args->rsvd2));
+		if (!in_fence) {
+			err = -EINVAL;
+			goto err_ext;
+		}
+	}
+#undef IN_FENCES
+
+	if (args->flags & I915_EXEC_FENCE_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			err = out_fence_fd;
+			goto err_in_fence;
+		}
+	}
+
+	err = eb_create(&eb);
+	if (err)
+		goto err_out_fence;
+
+	GEM_BUG_ON(!eb.lut_size);
+
+	err = eb_select_context(&eb);
+	if (unlikely(err))
+		goto err_destroy;
+
+	err = eb_select_engine(&eb);
+	if (unlikely(err))
+		goto err_context;
+
+	err = eb_lookup_vmas(&eb);
+	if (err) {
+		eb_release_vmas(&eb, true);
+		goto err_engine;
+	}
+
+	i915_gem_ww_ctx_init(&eb.ww, true);
+
+	err = eb_relocate_parse(&eb);
+	if (err) {
+		/*
+		 * If the user expects the execobject.offset and
+		 * reloc.presumed_offset to be an exact match,
+		 * as for using NO_RELOC, then we cannot update
+		 * the execobject.offset until we have completed
+		 * relocation.
+		 */
+		args->flags &= ~__EXEC_HAS_RELOC;
+		goto err_vma;
+	}
+
+	ww_acquire_done(&eb.ww.ctx);
+	err = eb_capture_stage(&eb);
+	if (err)
+		goto err_vma;
+
+	out_fence = eb_requests_create(&eb, in_fence, out_fence_fd);
+	if (IS_ERR(out_fence)) {
+		err = PTR_ERR(out_fence);
+		out_fence = NULL;
+		if (eb.requests[0])
+			goto err_request;
+		else
+			goto err_vma;
+	}
+
+	err = eb_submit(&eb);
+
+err_request:
+	eb_requests_get(&eb);
+	err = eb_requests_add(&eb, err);
+
+	if (eb.fences)
+		signal_fence_array(&eb, eb.composite_fence ?
+				   eb.composite_fence :
+				   &eb.requests[0]->fence);
+
+	if (unlikely(eb.gem_context->syncobj)) {
+		drm_syncobj_replace_fence(eb.gem_context->syncobj,
+					  eb.composite_fence ?
+					  eb.composite_fence :
+					  &eb.requests[0]->fence);
+	}
+
+	if (out_fence) {
+		if (err == 0) {
+			fd_install(out_fence_fd, out_fence->file);
+			args->rsvd2 &= GENMASK_ULL(31, 0); /* keep in-fence */
+			args->rsvd2 |= (u64)out_fence_fd << 32;
+			out_fence_fd = -1;
+		} else {
+			fput(out_fence->file);
+		}
+	}
+
+	if (!out_fence && eb.composite_fence)
+		dma_fence_put(eb.composite_fence);
+
+	eb_requests_put(&eb);
+
+err_vma:
+	eb_release_vmas(&eb, true);
+	WARN_ON(err == -EDEADLK);
+	i915_gem_ww_ctx_fini(&eb.ww);
+
+	if (eb.batch_pool)
+		intel_gt_buffer_pool_put(eb.batch_pool);
+err_engine:
+	eb_put_engine(&eb);
+err_context:
+	i915_gem_context_put(eb.gem_context);
+err_destroy:
+	eb_destroy(&eb);
+err_out_fence:
+	if (out_fence_fd != -1)
+		put_unused_fd(out_fence_fd);
+err_in_fence:
+	dma_fence_put(in_fence);
+err_ext:
+	put_fence_array(eb.fences, eb.num_fences);
+	return err;
+}
+
+static size_t eb_element_size(void)
+{
+	return sizeof(struct drm_i915_gem_exec_object2) + sizeof(struct eb_vma);
+}
+
+static bool check_buffer_count(size_t count)
+{
+	const size_t sz = eb_element_size();
+
+	/*
+	 * When using LUT_HANDLE, we impose a limit of INT_MAX for the lookup
+	 * array size (see eb_create()). Otherwise, we can accept an array as
+	 * large as can be addressed (though use large arrays at your peril)!
+	 */
+
+	return !(count < 1 || count > INT_MAX || count > SIZE_MAX / sz - 1);
+}
+
+int
+i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_execbuffer2 *args = data;
+	struct drm_i915_gem_exec_object2 *exec2_list;
+	const size_t count = args->buffer_count;
+	int err;
+
+	if (!check_buffer_count(count)) {
+		drm_dbg(&i915->drm, "execbuf2 with %zd buffers\n", count);
+		return -EINVAL;
+	}
+
+	err = i915_gem_check_execbuffer(i915, args);
+	if (err)
+		return err;
+
+	/* Allocate extra slots for use by the command parser */
+	exec2_list = kvmalloc_array(count + 2, eb_element_size(),
+				    __GFP_NOWARN | GFP_KERNEL);
+	if (exec2_list == NULL) {
+		drm_dbg(&i915->drm, "Failed to allocate exec list for %zd buffers\n",
+			count);
+		return -ENOMEM;
+	}
+	if (copy_from_user(exec2_list,
+			   u64_to_user_ptr(args->buffers_ptr),
+			   sizeof(*exec2_list) * count)) {
+		drm_dbg(&i915->drm, "copy %zd exec entries failed\n", count);
+		kvfree(exec2_list);
+		return -EFAULT;
+	}
+
+	err = i915_gem_do_execbuffer(dev, file, args, exec2_list);
+
+	/*
+	 * Now that we have begun execution of the batchbuffer, we ignore
+	 * any new error after this point. Also given that we have already
+	 * updated the associated relocations, we try to write out the current
+	 * object locations irrespective of any error.
+	 */
+	if (args->flags & __EXEC_HAS_RELOC) {
+		struct drm_i915_gem_exec_object2 __user *user_exec_list =
+			u64_to_user_ptr(args->buffers_ptr);
+		unsigned int i;
+
+		/* Copy the new buffer offsets back to the user's exec list. */
+		/*
+		 * Note: count * sizeof(*user_exec_list) does not overflow,
+		 * because we checked 'count' in check_buffer_count().
+		 *
+		 * And this range already got effectively checked earlier
+		 * when we did the "copy_from_user()" above.
+		 */
+		if (!user_write_access_begin(user_exec_list,
+					     count * sizeof(*user_exec_list)))
+			goto end;
+
+		for (i = 0; i < args->buffer_count; i++) {
+			if (!(exec2_list[i].offset & UPDATE))
+				continue;
+
+			exec2_list[i].offset =
+				gen8_canonical_addr(exec2_list[i].offset & PIN_OFFSET_MASK);
+			unsafe_put_user(exec2_list[i].offset,
+					&user_exec_list[i].offset,
+					end_user);
+		}
+end_user:
+		user_write_access_end();
+end:;
+	}
+
+	args->flags &= ~__I915_EXEC_UNKNOWN_FLAGS;
+	kvfree(exec2_list);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
new file mode 100644
index 0000000000..6bc26b4b06
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
@@ -0,0 +1,199 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+
+#include "i915_drv.h"
+#include "i915_gem.h"
+#include "i915_gem_internal.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+#include "i915_utils.h"
+
+#define QUIET (__GFP_NORETRY | __GFP_NOWARN)
+#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+
+static void internal_free_pages(struct sg_table *st)
+{
+	struct scatterlist *sg;
+
+	for (sg = st->sgl; sg; sg = __sg_next(sg)) {
+		if (sg_page(sg))
+			__free_pages(sg_page(sg), get_order(sg->length));
+	}
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned int npages; /* restricted by sg_alloc_table */
+	int max_order = MAX_ORDER;
+	unsigned int max_segment;
+	gfp_t gfp;
+
+	if (overflows_type(obj->base.size >> PAGE_SHIFT, npages))
+		return -E2BIG;
+
+	npages = obj->base.size >> PAGE_SHIFT;
+	max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT;
+	max_order = min(max_order, get_order(max_segment));
+
+	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		gfp &= ~__GFP_HIGHMEM;
+		gfp |= __GFP_DMA32;
+	}
+
+create_st:
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	st->nents = 0;
+
+	do {
+		int order = min(fls(npages) - 1, max_order);
+		struct page *page;
+
+		do {
+			page = alloc_pages(gfp | (order ? QUIET : MAYFAIL),
+					   order);
+			if (page)
+				break;
+			if (!order--)
+				goto err;
+
+			/* Limit subsequent allocations as well */
+			max_order = order;
+		} while (1);
+
+		sg_set_page(sg, page, PAGE_SIZE << order, 0);
+		st->nents++;
+
+		npages -= 1 << order;
+		if (!npages) {
+			sg_mark_end(sg);
+			break;
+		}
+
+		sg = __sg_next(sg);
+	} while (1);
+
+	if (i915_gem_gtt_prepare_pages(obj, st)) {
+		/* Failed to dma-map try again with single page sg segments */
+		if (get_order(st->sgl->length)) {
+			internal_free_pages(st);
+			max_order = 0;
+			goto create_st;
+		}
+		goto err;
+	}
+
+	__i915_gem_object_set_pages(obj, st);
+
+	return 0;
+
+err:
+	sg_set_page(sg, NULL, 0, 0);
+	sg_mark_end(sg);
+	internal_free_pages(st);
+
+	return -ENOMEM;
+}
+
+static void i915_gem_object_put_pages_internal(struct drm_i915_gem_object *obj,
+					       struct sg_table *pages)
+{
+	i915_gem_gtt_finish_pages(obj, pages);
+	internal_free_pages(pages);
+
+	obj->mm.dirty = false;
+
+	__start_cpu_write(obj);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_internal_ops = {
+	.name = "i915_gem_object_internal",
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = i915_gem_object_get_pages_internal,
+	.put_pages = i915_gem_object_put_pages_internal,
+};
+
+struct drm_i915_gem_object *
+__i915_gem_object_create_internal(struct drm_i915_private *i915,
+				  const struct drm_i915_gem_object_ops *ops,
+				  phys_addr_t size)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_gem_object *obj;
+	unsigned int cache_level;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+	i915_gem_object_init(obj, ops, &lock_class, 0);
+	obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+
+	/*
+	 * Mark the object as volatile, such that the pages are marked as
+	 * dontneed whilst they are still pinned. As soon as they are unpinned
+	 * they are allowed to be reaped by the shrinker, and the caller is
+	 * expected to repopulate - the contents of this object are only valid
+	 * whilst active and pinned.
+	 */
+	i915_gem_object_set_volatile(obj);
+
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+
+	cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	return obj;
+}
+
+/**
+ * i915_gem_object_create_internal: create an object with volatile pages
+ * @i915: the i915 device
+ * @size: the size in bytes of backing storage to allocate for the object
+ *
+ * Creates a new object that wraps some internal memory for private use.
+ * This object is not backed by swappable storage, and as such its contents
+ * are volatile and only valid whilst pinned. If the object is reaped by the
+ * shrinker, its pages and data will be discarded. Equally, it is not a full
+ * GEM object and so not valid for access from userspace. This makes it useful
+ * for hardware interfaces like ringbuffers (which are pinned from the time
+ * the request is written to the time the hardware stops accessing it), but
+ * not for contexts (which need to be preserved when not active for later
+ * reuse). Note that it is not cleared upon allocation.
+ */
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *i915,
+				phys_addr_t size)
+{
+	return __i915_gem_object_create_internal(i915, &i915_gem_object_internal_ops, size);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.h b/drivers/gpu/drm/i915/gem/i915_gem_internal.h
new file mode 100644
index 0000000000..6664e06112
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_GEM_INTERNAL_H__
+#define __I915_GEM_INTERNAL_H__
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct drm_i915_gem_object_ops;
+struct drm_i915_private;
+
+struct drm_i915_gem_object *
+i915_gem_object_create_internal(struct drm_i915_private *i915,
+				phys_addr_t size);
+struct drm_i915_gem_object *
+__i915_gem_object_create_internal(struct drm_i915_private *i915,
+				  const struct drm_i915_gem_object_ops *ops,
+				  phys_addr_t size);
+
+#endif /* __I915_GEM_INTERNAL_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
new file mode 100644
index 0000000000..28d6526e32
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ioctls.h
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef I915_GEM_IOCTLS_H
+#define I915_GEM_IOCTLS_H
+
+struct drm_device;
+struct drm_file;
+
+int i915_gem_busy_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_create_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_create_ext_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *file);
+int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+int i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_pread_ioctl(struct drm_device *dev, void *data,
+			 struct drm_file *file);
+int i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file);
+int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data,
+			       struct drm_file *file);
+int i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			      struct drm_file *file);
+int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
+			     struct drm_file *file);
+int i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			    struct drm_file *file);
+int i915_gem_userptr_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file);
+int i915_gem_wait_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
new file mode 100644
index 0000000000..3198b64ad7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <uapi/drm/i915_drm.h>
+
+#include "intel_memory_region.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_lmem.h"
+#include "i915_drv.h"
+
+void __iomem *
+i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
+			    unsigned long n,
+			    unsigned long size)
+{
+	resource_size_t offset;
+
+	GEM_BUG_ON(!i915_gem_object_is_contiguous(obj));
+
+	offset = i915_gem_object_get_dma_address(obj, n);
+	offset -= obj->mm.region->region.start;
+
+	return io_mapping_map_wc(&obj->mm.region->iomap, offset, size);
+}
+
+/**
+ * i915_gem_object_is_lmem - Whether the object is resident in
+ * lmem
+ * @obj: The object to check.
+ *
+ * Even if an object is allowed to migrate and change memory region,
+ * this function checks whether it will always be present in lmem when
+ * valid *or* if that's not the case, whether it's currently resident in lmem.
+ * For migratable and evictable objects, the latter only makes sense when
+ * the object is locked.
+ *
+ * Return: Whether the object migratable but resident in lmem, or not
+ * migratable and will be present in lmem when valid.
+ */
+bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
+{
+	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+
+#ifdef CONFIG_LOCKDEP
+	if (i915_gem_object_migratable(obj) &&
+	    i915_gem_object_evictable(obj))
+		assert_object_held(obj);
+#endif
+	return mr && (mr->type == INTEL_MEMORY_LOCAL ||
+		      mr->type == INTEL_MEMORY_STOLEN_LOCAL);
+}
+
+/**
+ * __i915_gem_object_is_lmem - Whether the object is resident in
+ * lmem while in the fence signaling critical path.
+ * @obj: The object to check.
+ *
+ * This function is intended to be called from within the fence signaling
+ * path where the fence, or a pin, keeps the object from being migrated. For
+ * example during gpu reset or similar.
+ *
+ * Return: Whether the object is resident in lmem.
+ */
+bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
+{
+	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+
+#ifdef CONFIG_LOCKDEP
+	GEM_WARN_ON(dma_resv_test_signaled(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP) &&
+		    i915_gem_object_evictable(obj));
+#endif
+	return mr && (mr->type == INTEL_MEMORY_LOCAL ||
+		      mr->type == INTEL_MEMORY_STOLEN_LOCAL);
+}
+
+/**
+ * __i915_gem_object_create_lmem_with_ps - Create lmem object and force the
+ * minimum page size for the backing pages.
+ * @i915: The i915 instance.
+ * @size: The size in bytes for the object. Note that we need to round the size
+ * up depending on the @page_size. The final object size can be fished out from
+ * the drm GEM object.
+ * @page_size: The requested minimum page size in bytes for this object. This is
+ * useful if we need something bigger than the regions min_page_size due to some
+ * hw restriction, or in some very specialised cases where it needs to be
+ * smaller, where the internal fragmentation cost is too great when rounding up
+ * the object size.
+ * @flags: The optional BO allocation flags.
+ *
+ * Note that this interface assumes you know what you are doing when forcing the
+ * @page_size. If this is smaller than the regions min_page_size then it can
+ * never be inserted into any GTT, otherwise it might lead to undefined
+ * behaviour.
+ *
+ * Return: The object pointer, which might be an ERR_PTR in the case of failure.
+ */
+struct drm_i915_gem_object *
+__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
+				      resource_size_t size,
+				      resource_size_t page_size,
+				      unsigned int flags)
+{
+	return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM_0],
+					     size, page_size, flags);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
+				      const void *data, size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	void *map;
+
+	obj = i915_gem_object_create_lmem(i915,
+					  round_up(size, PAGE_SIZE),
+					  I915_BO_ALLOC_CONTIGUOUS);
+	if (IS_ERR(obj))
+		return obj;
+
+	map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
+	if (IS_ERR(map)) {
+		i915_gem_object_put(obj);
+		return map;
+	}
+
+	memcpy(map, data, size);
+
+	i915_gem_object_flush_map(obj);
+	__i915_gem_object_release_map(obj);
+
+	return obj;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem(struct drm_i915_private *i915,
+			    resource_size_t size,
+			    unsigned int flags)
+{
+	return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_LMEM_0],
+					     size, 0, flags);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
new file mode 100644
index 0000000000..5a7a14e85c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_LMEM_H
+#define __I915_GEM_LMEM_H
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_i915_gem_object;
+struct intel_memory_region;
+
+void __iomem *
+i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
+			    unsigned long n,
+			    unsigned long size);
+
+bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
+
+bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
+				      const void *data, size_t size);
+
+struct drm_i915_gem_object *
+__i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915,
+				      resource_size_t size,
+				      resource_size_t page_size,
+				      unsigned int flags);
+struct drm_i915_gem_object *
+i915_gem_object_create_lmem(struct drm_i915_private *i915,
+			    resource_size_t size,
+			    unsigned int flags);
+
+#endif /* !__I915_GEM_LMEM_H */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
new file mode 100644
index 0000000000..310654542b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -0,0 +1,1105 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/anon_inodes.h>
+#include <linux/mman.h>
+#include <linux/pfn_t.h>
+#include <linux/sizes.h>
+
+#include <drm/drm_cache.h>
+
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
+
+#include "i915_drv.h"
+#include "i915_gem_evict.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_gem_mman.h"
+#include "i915_mm.h"
+#include "i915_trace.h"
+#include "i915_user_extensions.h"
+#include "i915_gem_ttm.h"
+#include "i915_vma.h"
+
+static inline bool
+__vma_matches(struct vm_area_struct *vma, struct file *filp,
+	      unsigned long addr, unsigned long size)
+{
+	if (vma->vm_file != filp)
+		return false;
+
+	return vma->vm_start == addr &&
+	       (vma->vm_end - vma->vm_start) == PAGE_ALIGN(size);
+}
+
+/**
+ * i915_gem_mmap_ioctl - Maps the contents of an object, returning the address
+ *			 it is mapped to.
+ * @dev: drm device
+ * @data: ioctl data blob
+ * @file: drm file
+ *
+ * While the mapping holds a reference on the contents of the object, it doesn't
+ * imply a ref on the object itself.
+ *
+ * IMPORTANT:
+ *
+ * DRM driver writers who look a this function as an example for how to do GEM
+ * mmap support, please don't implement mmap support like here. The modern way
+ * to implement DRM mmap support is with an mmap offset ioctl (like
+ * i915_gem_mmap_gtt) and then using the mmap syscall on the DRM fd directly.
+ * That way debug tooling like valgrind will understand what's going on, hiding
+ * the mmap call in a driver private ioctl will break that. The i915 driver only
+ * does cpu mmaps this way because we didn't know better.
+ */
+int
+i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
+		    struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_mmap *args = data;
+	struct drm_i915_gem_object *obj;
+	unsigned long addr;
+
+	/*
+	 * mmap ioctl is disallowed for all discrete platforms,
+	 * and for all platforms with GRAPHICS_VER > 12.
+	 */
+	if (IS_DGFX(i915) || GRAPHICS_VER_FULL(i915) > IP_VER(12, 0))
+		return -EOPNOTSUPP;
+
+	if (args->flags & ~(I915_MMAP_WC))
+		return -EINVAL;
+
+	if (args->flags & I915_MMAP_WC && !pat_enabled())
+		return -ENODEV;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/* prime objects have no backing filp to GEM mmap
+	 * pages from.
+	 */
+	if (!obj->base.filp) {
+		addr = -ENXIO;
+		goto err;
+	}
+
+	if (range_overflows(args->offset, args->size, (u64)obj->base.size)) {
+		addr = -EINVAL;
+		goto err;
+	}
+
+	addr = vm_mmap(obj->base.filp, 0, args->size,
+		       PROT_READ | PROT_WRITE, MAP_SHARED,
+		       args->offset);
+	if (IS_ERR_VALUE(addr))
+		goto err;
+
+	if (args->flags & I915_MMAP_WC) {
+		struct mm_struct *mm = current->mm;
+		struct vm_area_struct *vma;
+
+		if (mmap_write_lock_killable(mm)) {
+			addr = -EINTR;
+			goto err;
+		}
+		vma = find_vma(mm, addr);
+		if (vma && __vma_matches(vma, obj->base.filp, addr, args->size))
+			vma->vm_page_prot =
+				pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		else
+			addr = -ENOMEM;
+		mmap_write_unlock(mm);
+		if (IS_ERR_VALUE(addr))
+			goto err;
+	}
+	i915_gem_object_put(obj);
+
+	args->addr_ptr = (u64)addr;
+	return 0;
+
+err:
+	i915_gem_object_put(obj);
+	return addr;
+}
+
+static unsigned int tile_row_pages(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tile_row_size(obj) >> PAGE_SHIFT;
+}
+
+/**
+ * i915_gem_mmap_gtt_version - report the current feature set for GTT mmaps
+ *
+ * A history of the GTT mmap interface:
+ *
+ * 0 - Everything had to fit into the GTT. Both parties of a memcpy had to
+ *     aligned and suitable for fencing, and still fit into the available
+ *     mappable space left by the pinned display objects. A classic problem
+ *     we called the page-fault-of-doom where we would ping-pong between
+ *     two objects that could not fit inside the GTT and so the memcpy
+ *     would page one object in at the expense of the other between every
+ *     single byte.
+ *
+ * 1 - Objects can be any size, and have any compatible fencing (X Y, or none
+ *     as set via i915_gem_set_tiling() [DRM_I915_GEM_SET_TILING]). If the
+ *     object is too large for the available space (or simply too large
+ *     for the mappable aperture!), a view is created instead and faulted
+ *     into userspace. (This view is aligned and sized appropriately for
+ *     fenced access.)
+ *
+ * 2 - Recognise WC as a separate cache domain so that we can flush the
+ *     delayed writes via GTT before performing direct access via WC.
+ *
+ * 3 - Remove implicit set-domain(GTT) and synchronisation on initial
+ *     pagefault; swapin remains transparent.
+ *
+ * 4 - Support multiple fault handlers per object depending on object's
+ *     backing storage (a.k.a. MMAP_OFFSET).
+ *
+ * Restrictions:
+ *
+ *  * snoopable objects cannot be accessed via the GTT. It can cause machine
+ *    hangs on some architectures, corruption on others. An attempt to service
+ *    a GTT page fault from a snoopable object will generate a SIGBUS.
+ *
+ *  * the object must be able to fit into RAM (physical memory, though no
+ *    limited to the mappable aperture).
+ *
+ *
+ * Caveats:
+ *
+ *  * a new GTT page fault will synchronize rendering from the GPU and flush
+ *    all data to system memory. Subsequent access will not be synchronized.
+ *
+ *  * all mappings are revoked on runtime device suspend.
+ *
+ *  * there are only 8, 16 or 32 fence registers to share between all users
+ *    (older machines require fence register for display and blitter access
+ *    as well). Contention of the fence registers will cause the previous users
+ *    to be unmapped and any new access will generate new page faults.
+ *
+ *  * running out of memory while servicing a fault may generate a SIGBUS,
+ *    rather than the expected SIGSEGV.
+ */
+int i915_gem_mmap_gtt_version(void)
+{
+	return 4;
+}
+
+static inline struct i915_gtt_view
+compute_partial_view(const struct drm_i915_gem_object *obj,
+		     pgoff_t page_offset,
+		     unsigned int chunk)
+{
+	struct i915_gtt_view view;
+
+	if (i915_gem_object_is_tiled(obj))
+		chunk = roundup(chunk, tile_row_pages(obj) ?: 1);
+
+	view.type = I915_GTT_VIEW_PARTIAL;
+	view.partial.offset = rounddown(page_offset, chunk);
+	view.partial.size =
+		min_t(unsigned int, chunk,
+		      (obj->base.size >> PAGE_SHIFT) - view.partial.offset);
+
+	/* If the partial covers the entire object, just create a normal VMA. */
+	if (chunk >= obj->base.size >> PAGE_SHIFT)
+		view.type = I915_GTT_VIEW_NORMAL;
+
+	return view;
+}
+
+static vm_fault_t i915_error_to_vmf_fault(int err)
+{
+	switch (err) {
+	default:
+		WARN_ONCE(err, "unhandled error in %s: %i\n", __func__, err);
+		fallthrough;
+	case -EIO: /* shmemfs failure from swap device */
+	case -EFAULT: /* purged object */
+	case -ENODEV: /* bad object, how did you get here! */
+	case -ENXIO: /* unable to access backing store (on device) */
+		return VM_FAULT_SIGBUS;
+
+	case -ENOMEM: /* our allocation failure */
+		return VM_FAULT_OOM;
+
+	case 0:
+	case -EAGAIN:
+	case -ENOSPC: /* transient failure to evict? */
+	case -ENOBUFS: /* temporarily out of fences? */
+	case -ERESTARTSYS:
+	case -EINTR:
+	case -EBUSY:
+		/*
+		 * EBUSY is ok: this just means that another thread
+		 * already did the job.
+		 */
+		return VM_FAULT_NOPAGE;
+	}
+}
+
+static vm_fault_t vm_fault_cpu(struct vm_fault *vmf)
+{
+	struct vm_area_struct *area = vmf->vma;
+	struct i915_mmap_offset *mmo = area->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+	resource_size_t iomap;
+	int err;
+
+	/* Sanity check that we allow writing into this object */
+	if (unlikely(i915_gem_object_is_readonly(obj) &&
+		     area->vm_flags & VM_WRITE))
+		return VM_FAULT_SIGBUS;
+
+	if (i915_gem_object_lock_interruptible(obj, NULL))
+		return VM_FAULT_NOPAGE;
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		goto out;
+
+	iomap = -1;
+	if (!i915_gem_object_has_struct_page(obj)) {
+		iomap = obj->mm.region->iomap.base;
+		iomap -= obj->mm.region->region.start;
+	}
+
+	/* PTEs are revoked in obj->ops->put_pages() */
+	err = remap_io_sg(area,
+			  area->vm_start, area->vm_end - area->vm_start,
+			  obj->mm.pages->sgl, iomap);
+
+	if (area->vm_flags & VM_WRITE) {
+		GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+		obj->mm.dirty = true;
+	}
+
+	i915_gem_object_unpin_pages(obj);
+
+out:
+	i915_gem_object_unlock(obj);
+	return i915_error_to_vmf_fault(err);
+}
+
+static vm_fault_t vm_fault_gtt(struct vm_fault *vmf)
+{
+#define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT)
+	struct vm_area_struct *area = vmf->vma;
+	struct i915_mmap_offset *mmo = area->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+	struct drm_device *dev = obj->base.dev;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct intel_runtime_pm *rpm = &i915->runtime_pm;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+	bool write = area->vm_flags & VM_WRITE;
+	struct i915_gem_ww_ctx ww;
+	intel_wakeref_t wakeref;
+	struct i915_vma *vma;
+	pgoff_t page_offset;
+	int srcu;
+	int ret;
+
+	/* We don't use vmf->pgoff since that has the fake offset */
+	page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT;
+
+	trace_i915_gem_object_fault(obj, page_offset, true, write);
+
+	wakeref = intel_runtime_pm_get(rpm);
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	ret = i915_gem_object_lock(obj, &ww);
+	if (ret)
+		goto err_rpm;
+
+	/* Sanity check that we allow writing into this object */
+	if (i915_gem_object_is_readonly(obj) && write) {
+		ret = -EFAULT;
+		goto err_rpm;
+	}
+
+	ret = i915_gem_object_pin_pages(obj);
+	if (ret)
+		goto err_rpm;
+
+	ret = intel_gt_reset_lock_interruptible(ggtt->vm.gt, &srcu);
+	if (ret)
+		goto err_pages;
+
+	/* Now pin it into the GTT as needed */
+	vma = i915_gem_object_ggtt_pin_ww(obj, &ww, NULL, 0, 0,
+					  PIN_MAPPABLE |
+					  PIN_NONBLOCK /* NOWARN */ |
+					  PIN_NOEVICT);
+	if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
+		/* Use a partial view if it is bigger than available space */
+		struct i915_gtt_view view =
+			compute_partial_view(obj, page_offset, MIN_CHUNK_PAGES);
+		unsigned int flags;
+
+		flags = PIN_MAPPABLE | PIN_NOSEARCH;
+		if (view.type == I915_GTT_VIEW_NORMAL)
+			flags |= PIN_NONBLOCK; /* avoid warnings for pinned */
+
+		/*
+		 * Userspace is now writing through an untracked VMA, abandon
+		 * all hope that the hardware is able to track future writes.
+		 */
+
+		vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+		if (IS_ERR(vma) && vma != ERR_PTR(-EDEADLK)) {
+			flags = PIN_MAPPABLE;
+			view.type = I915_GTT_VIEW_PARTIAL;
+			vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+		}
+
+		/*
+		 * The entire mappable GGTT is pinned? Unexpected!
+		 * Try to evict the object we locked too, as normally we skip it
+		 * due to lack of short term pinning inside execbuf.
+		 */
+		if (vma == ERR_PTR(-ENOSPC)) {
+			ret = mutex_lock_interruptible(&ggtt->vm.mutex);
+			if (!ret) {
+				ret = i915_gem_evict_vm(&ggtt->vm, &ww, NULL);
+				mutex_unlock(&ggtt->vm.mutex);
+			}
+			if (ret)
+				goto err_reset;
+			vma = i915_gem_object_ggtt_pin_ww(obj, &ww, &view, 0, 0, flags);
+		}
+	}
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_reset;
+	}
+
+	/* Access to snoopable pages through the GTT is incoherent. */
+	/*
+	 * For objects created by userspace through GEM_CREATE with pat_index
+	 * set by set_pat extension, coherency is managed by userspace, make
+	 * sure we don't fail handling the vm fault by calling
+	 * i915_gem_object_has_cache_level() which always return true for such
+	 * objects. Otherwise this helper function would fall back to checking
+	 * whether the object is un-cached.
+	 */
+	if (!(i915_gem_object_has_cache_level(obj, I915_CACHE_NONE) ||
+	      HAS_LLC(i915))) {
+		ret = -EFAULT;
+		goto err_unpin;
+	}
+
+	ret = i915_vma_pin_fence(vma);
+	if (ret)
+		goto err_unpin;
+
+	/* Finally, remap it using the new GTT offset */
+	ret = remap_io_mapping(area,
+			       area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT),
+			       (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT,
+			       min_t(u64, vma->size, area->vm_end - area->vm_start),
+			       &ggtt->iomap);
+	if (ret)
+		goto err_fence;
+
+	assert_rpm_wakelock_held(rpm);
+
+	/* Mark as being mmapped into userspace for later revocation */
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
+	if (!i915_vma_set_userfault(vma) && !obj->userfault_count++)
+		list_add(&obj->userfault_link, &to_gt(i915)->ggtt->userfault_list);
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
+
+	/* Track the mmo associated with the fenced vma */
+	vma->mmo = mmo;
+
+	if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
+		intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref,
+				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
+
+	if (write) {
+		GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+		i915_vma_set_ggtt_write(vma);
+		obj->mm.dirty = true;
+	}
+
+err_fence:
+	i915_vma_unpin_fence(vma);
+err_unpin:
+	__i915_vma_unpin(vma);
+err_reset:
+	intel_gt_reset_unlock(ggtt->vm.gt, srcu);
+err_pages:
+	i915_gem_object_unpin_pages(obj);
+err_rpm:
+	if (ret == -EDEADLK) {
+		ret = i915_gem_ww_ctx_backoff(&ww);
+		if (!ret)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	intel_runtime_pm_put(rpm, wakeref);
+	return i915_error_to_vmf_fault(ret);
+}
+
+static int
+vm_access(struct vm_area_struct *area, unsigned long addr,
+	  void *buf, int len, int write)
+{
+	struct i915_mmap_offset *mmo = area->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+	struct i915_gem_ww_ctx ww;
+	void *vaddr;
+	int err = 0;
+
+	if (i915_gem_object_is_readonly(obj) && write)
+		return -EACCES;
+
+	addr -= area->vm_start;
+	if (range_overflows_t(u64, addr, len, obj->base.size))
+		return -EINVAL;
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	err = i915_gem_object_lock(obj, &ww);
+	if (err)
+		goto out;
+
+	/* As this is primarily for debugging, let's focus on simplicity */
+	vaddr = i915_gem_object_pin_map(obj, I915_MAP_FORCE_WC);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto out;
+	}
+
+	if (write) {
+		memcpy(vaddr + addr, buf, len);
+		__i915_gem_object_flush_map(obj, addr, len);
+	} else {
+		memcpy(buf, vaddr + addr, len);
+	}
+
+	i915_gem_object_unpin_map(obj);
+out:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+
+	if (err)
+		return err;
+
+	return len;
+}
+
+void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+
+	GEM_BUG_ON(!obj->userfault_count);
+
+	for_each_ggtt_vma(vma, obj)
+		i915_vma_revoke_mmap(vma);
+
+	GEM_BUG_ON(obj->userfault_count);
+}
+
+/*
+ * It is vital that we remove the page mapping if we have mapped a tiled
+ * object through the GTT and then lose the fence register due to
+ * resource pressure. Similarly if the object has been moved out of the
+ * aperture, than pages mapped into userspace must be revoked. Removing the
+ * mapping will then trigger a page fault on the next user access, allowing
+ * fixup by vm_fault_gtt().
+ */
+void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	intel_wakeref_t wakeref;
+
+	/*
+	 * Serialisation between user GTT access and our code depends upon
+	 * revoking the CPU's PTE whilst the mutex is held. The next user
+	 * pagefault then has to wait until we release the mutex.
+	 *
+	 * Note that RPM complicates somewhat by adding an additional
+	 * requirement that operations to the GGTT be made holding the RPM
+	 * wakeref.
+	 */
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
+
+	if (!obj->userfault_count)
+		goto out;
+
+	__i915_gem_object_release_mmap_gtt(obj);
+
+	/*
+	 * Ensure that the CPU's PTE are revoked and there are not outstanding
+	 * memory transactions from userspace before we return. The TLB
+	 * flushing implied above by changing the PTE above *should* be
+	 * sufficient, an extra barrier here just provides us with a bit
+	 * of paranoid documentation about our requirement to serialise
+	 * memory writes before touching registers / GSM.
+	 */
+	wmb();
+
+out:
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+}
+
+void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct ttm_device *bdev = bo->bdev;
+
+	drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);
+
+	/*
+	 * We have exclusive access here via runtime suspend. All other callers
+	 * must first grab the rpm wakeref.
+	 */
+	GEM_BUG_ON(!obj->userfault_count);
+	list_del(&obj->userfault_link);
+	obj->userfault_count = 0;
+}
+
+void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	struct i915_mmap_offset *mmo, *mn;
+
+	if (obj->ops->unmap_virtual)
+		obj->ops->unmap_virtual(obj);
+
+	spin_lock(&obj->mmo.lock);
+	rbtree_postorder_for_each_entry_safe(mmo, mn,
+					     &obj->mmo.offsets, offset) {
+		/*
+		 * vma_node_unmap for GTT mmaps handled already in
+		 * __i915_gem_object_release_mmap_gtt
+		 */
+		if (mmo->mmap_type == I915_MMAP_TYPE_GTT)
+			continue;
+
+		spin_unlock(&obj->mmo.lock);
+		drm_vma_node_unmap(&mmo->vma_node,
+				   obj->base.dev->anon_inode->i_mapping);
+		spin_lock(&obj->mmo.lock);
+	}
+	spin_unlock(&obj->mmo.lock);
+}
+
+static struct i915_mmap_offset *
+lookup_mmo(struct drm_i915_gem_object *obj,
+	   enum i915_mmap_type mmap_type)
+{
+	struct rb_node *rb;
+
+	spin_lock(&obj->mmo.lock);
+	rb = obj->mmo.offsets.rb_node;
+	while (rb) {
+		struct i915_mmap_offset *mmo =
+			rb_entry(rb, typeof(*mmo), offset);
+
+		if (mmo->mmap_type == mmap_type) {
+			spin_unlock(&obj->mmo.lock);
+			return mmo;
+		}
+
+		if (mmo->mmap_type < mmap_type)
+			rb = rb->rb_right;
+		else
+			rb = rb->rb_left;
+	}
+	spin_unlock(&obj->mmo.lock);
+
+	return NULL;
+}
+
+static struct i915_mmap_offset *
+insert_mmo(struct drm_i915_gem_object *obj, struct i915_mmap_offset *mmo)
+{
+	struct rb_node *rb, **p;
+
+	spin_lock(&obj->mmo.lock);
+	rb = NULL;
+	p = &obj->mmo.offsets.rb_node;
+	while (*p) {
+		struct i915_mmap_offset *pos;
+
+		rb = *p;
+		pos = rb_entry(rb, typeof(*pos), offset);
+
+		if (pos->mmap_type == mmo->mmap_type) {
+			spin_unlock(&obj->mmo.lock);
+			drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
+					      &mmo->vma_node);
+			kfree(mmo);
+			return pos;
+		}
+
+		if (pos->mmap_type < mmo->mmap_type)
+			p = &rb->rb_right;
+		else
+			p = &rb->rb_left;
+	}
+	rb_link_node(&mmo->offset, rb, p);
+	rb_insert_color(&mmo->offset, &obj->mmo.offsets);
+	spin_unlock(&obj->mmo.lock);
+
+	return mmo;
+}
+
+static struct i915_mmap_offset *
+mmap_offset_attach(struct drm_i915_gem_object *obj,
+		   enum i915_mmap_type mmap_type,
+		   struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_mmap_offset *mmo;
+	int err;
+
+	GEM_BUG_ON(obj->ops->mmap_offset || obj->ops->mmap_ops);
+
+	mmo = lookup_mmo(obj, mmap_type);
+	if (mmo)
+		goto out;
+
+	mmo = kmalloc(sizeof(*mmo), GFP_KERNEL);
+	if (!mmo)
+		return ERR_PTR(-ENOMEM);
+
+	mmo->obj = obj;
+	mmo->mmap_type = mmap_type;
+	drm_vma_node_reset(&mmo->vma_node);
+
+	err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
+				 &mmo->vma_node, obj->base.size / PAGE_SIZE);
+	if (likely(!err))
+		goto insert;
+
+	/* Attempt to reap some mmap space from dead objects */
+	err = intel_gt_retire_requests_timeout(to_gt(i915), MAX_SCHEDULE_TIMEOUT,
+					       NULL);
+	if (err)
+		goto err;
+
+	i915_gem_drain_freed_objects(i915);
+	err = drm_vma_offset_add(obj->base.dev->vma_offset_manager,
+				 &mmo->vma_node, obj->base.size / PAGE_SIZE);
+	if (err)
+		goto err;
+
+insert:
+	mmo = insert_mmo(obj, mmo);
+	GEM_BUG_ON(lookup_mmo(obj, mmap_type) != mmo);
+out:
+	if (file)
+		drm_vma_node_allow_once(&mmo->vma_node, file);
+	return mmo;
+
+err:
+	kfree(mmo);
+	return ERR_PTR(err);
+}
+
+static int
+__assign_mmap_offset(struct drm_i915_gem_object *obj,
+		     enum i915_mmap_type mmap_type,
+		     u64 *offset, struct drm_file *file)
+{
+	struct i915_mmap_offset *mmo;
+
+	if (i915_gem_object_never_mmap(obj))
+		return -ENODEV;
+
+	if (obj->ops->mmap_offset)  {
+		if (mmap_type != I915_MMAP_TYPE_FIXED)
+			return -ENODEV;
+
+		*offset = obj->ops->mmap_offset(obj);
+		return 0;
+	}
+
+	if (mmap_type == I915_MMAP_TYPE_FIXED)
+		return -ENODEV;
+
+	if (mmap_type != I915_MMAP_TYPE_GTT &&
+	    !i915_gem_object_has_struct_page(obj) &&
+	    !i915_gem_object_has_iomem(obj))
+		return -ENODEV;
+
+	mmo = mmap_offset_attach(obj, mmap_type, file);
+	if (IS_ERR(mmo))
+		return PTR_ERR(mmo);
+
+	*offset = drm_vma_node_offset_addr(&mmo->vma_node);
+	return 0;
+}
+
+static int
+__assign_mmap_offset_handle(struct drm_file *file,
+			    u32 handle,
+			    enum i915_mmap_type mmap_type,
+			    u64 *offset)
+{
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	obj = i915_gem_object_lookup(file, handle);
+	if (!obj)
+		return -ENOENT;
+
+	err = i915_gem_object_lock_interruptible(obj, NULL);
+	if (err)
+		goto out_put;
+	err = __assign_mmap_offset(obj, mmap_type, offset, file);
+	i915_gem_object_unlock(obj);
+out_put:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+int
+i915_gem_dumb_mmap_offset(struct drm_file *file,
+			  struct drm_device *dev,
+			  u32 handle,
+			  u64 *offset)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	enum i915_mmap_type mmap_type;
+
+	if (HAS_LMEM(to_i915(dev)))
+		mmap_type = I915_MMAP_TYPE_FIXED;
+	else if (pat_enabled())
+		mmap_type = I915_MMAP_TYPE_WC;
+	else if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
+		return -ENODEV;
+	else
+		mmap_type = I915_MMAP_TYPE_GTT;
+
+	return __assign_mmap_offset_handle(file, handle, mmap_type, offset);
+}
+
+/**
+ * i915_gem_mmap_offset_ioctl - prepare an object for GTT mmap'ing
+ * @dev: DRM device
+ * @data: GTT mapping ioctl data
+ * @file: GEM object info
+ *
+ * Simply returns the fake offset to userspace so it can mmap it.
+ * The mmap call will end up in drm_gem_mmap(), which will set things
+ * up so we can get faults in the handler above.
+ *
+ * The fault handler will take care of binding the object into the GTT
+ * (since it may have been evicted to make room for something), allocating
+ * a fence register, and mapping the appropriate aperture address into
+ * userspace.
+ */
+int
+i915_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
+			   struct drm_file *file)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct drm_i915_gem_mmap_offset *args = data;
+	enum i915_mmap_type type;
+	int err;
+
+	/*
+	 * Historically we failed to check args.pad and args.offset
+	 * and so we cannot use those fields for user input and we cannot
+	 * add -EINVAL for them as the ABI is fixed, i.e. old userspace
+	 * may be feeding in garbage in those fields.
+	 *
+	 * if (args->pad) return -EINVAL; is verbotten!
+	 */
+
+	err = i915_user_extensions(u64_to_user_ptr(args->extensions),
+				   NULL, 0, NULL);
+	if (err)
+		return err;
+
+	switch (args->flags) {
+	case I915_MMAP_OFFSET_GTT:
+		if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
+			return -ENODEV;
+		type = I915_MMAP_TYPE_GTT;
+		break;
+
+	case I915_MMAP_OFFSET_WC:
+		if (!pat_enabled())
+			return -ENODEV;
+		type = I915_MMAP_TYPE_WC;
+		break;
+
+	case I915_MMAP_OFFSET_WB:
+		type = I915_MMAP_TYPE_WB;
+		break;
+
+	case I915_MMAP_OFFSET_UC:
+		if (!pat_enabled())
+			return -ENODEV;
+		type = I915_MMAP_TYPE_UC;
+		break;
+
+	case I915_MMAP_OFFSET_FIXED:
+		type = I915_MMAP_TYPE_FIXED;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return __assign_mmap_offset_handle(file, args->handle, type, &args->offset);
+}
+
+static void vm_open(struct vm_area_struct *vma)
+{
+	struct i915_mmap_offset *mmo = vma->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+
+	GEM_BUG_ON(!obj);
+	i915_gem_object_get(obj);
+}
+
+static void vm_close(struct vm_area_struct *vma)
+{
+	struct i915_mmap_offset *mmo = vma->vm_private_data;
+	struct drm_i915_gem_object *obj = mmo->obj;
+
+	GEM_BUG_ON(!obj);
+	i915_gem_object_put(obj);
+}
+
+static const struct vm_operations_struct vm_ops_gtt = {
+	.fault = vm_fault_gtt,
+	.access = vm_access,
+	.open = vm_open,
+	.close = vm_close,
+};
+
+static const struct vm_operations_struct vm_ops_cpu = {
+	.fault = vm_fault_cpu,
+	.access = vm_access,
+	.open = vm_open,
+	.close = vm_close,
+};
+
+static int singleton_release(struct inode *inode, struct file *file)
+{
+	struct drm_i915_private *i915 = file->private_data;
+
+	cmpxchg(&i915->gem.mmap_singleton, file, NULL);
+	drm_dev_put(&i915->drm);
+
+	return 0;
+}
+
+static const struct file_operations singleton_fops = {
+	.owner = THIS_MODULE,
+	.release = singleton_release,
+};
+
+static struct file *mmap_singleton(struct drm_i915_private *i915)
+{
+	struct file *file;
+
+	rcu_read_lock();
+	file = READ_ONCE(i915->gem.mmap_singleton);
+	if (file && !get_file_rcu(file))
+		file = NULL;
+	rcu_read_unlock();
+	if (file)
+		return file;
+
+	file = anon_inode_getfile("i915.gem", &singleton_fops, i915, O_RDWR);
+	if (IS_ERR(file))
+		return file;
+
+	/* Everyone shares a single global address space */
+	file->f_mapping = i915->drm.anon_inode->i_mapping;
+
+	smp_store_mb(i915->gem.mmap_singleton, file);
+	drm_dev_get(&i915->drm);
+
+	return file;
+}
+
+static int
+i915_gem_object_mmap(struct drm_i915_gem_object *obj,
+		     struct i915_mmap_offset *mmo,
+		     struct vm_area_struct *vma)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct drm_device *dev = &i915->drm;
+	struct file *anon;
+
+	if (i915_gem_object_is_readonly(obj)) {
+		if (vma->vm_flags & VM_WRITE) {
+			i915_gem_object_put(obj);
+			return -EINVAL;
+		}
+		vm_flags_clear(vma, VM_MAYWRITE);
+	}
+
+	anon = mmap_singleton(to_i915(dev));
+	if (IS_ERR(anon)) {
+		i915_gem_object_put(obj);
+		return PTR_ERR(anon);
+	}
+
+	vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_IO);
+
+	/*
+	 * We keep the ref on mmo->obj, not vm_file, but we require
+	 * vma->vm_file->f_mapping, see vma_link(), for later revocation.
+	 * Our userspace is accustomed to having per-file resource cleanup
+	 * (i.e. contexts, objects and requests) on their close(fd), which
+	 * requires avoiding extraneous references to their filp, hence why
+	 * we prefer to use an anonymous file for their mmaps.
+	 */
+	vma_set_file(vma, anon);
+	/* Drop the initial creation reference, the vma is now holding one. */
+	fput(anon);
+
+	if (obj->ops->mmap_ops) {
+		vma->vm_page_prot = pgprot_decrypted(vm_get_page_prot(vma->vm_flags));
+		vma->vm_ops = obj->ops->mmap_ops;
+		vma->vm_private_data = obj->base.vma_node.driver_private;
+		return 0;
+	}
+
+	vma->vm_private_data = mmo;
+
+	switch (mmo->mmap_type) {
+	case I915_MMAP_TYPE_WC:
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		vma->vm_ops = &vm_ops_cpu;
+		break;
+
+	case I915_MMAP_TYPE_FIXED:
+		GEM_WARN_ON(1);
+		fallthrough;
+	case I915_MMAP_TYPE_WB:
+		vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+		vma->vm_ops = &vm_ops_cpu;
+		break;
+
+	case I915_MMAP_TYPE_UC:
+		vma->vm_page_prot =
+			pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+		vma->vm_ops = &vm_ops_cpu;
+		break;
+
+	case I915_MMAP_TYPE_GTT:
+		vma->vm_page_prot =
+			pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+		vma->vm_ops = &vm_ops_gtt;
+		break;
+	}
+	vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
+
+	return 0;
+}
+
+/*
+ * This overcomes the limitation in drm_gem_mmap's assignment of a
+ * drm_gem_object as the vma->vm_private_data. Since we need to
+ * be able to resolve multiple mmap offsets which could be tied
+ * to a single gem object.
+ */
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_vma_offset_node *node;
+	struct drm_file *priv = filp->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_i915_gem_object *obj = NULL;
+	struct i915_mmap_offset *mmo = NULL;
+
+	if (drm_dev_is_unplugged(dev))
+		return -ENODEV;
+
+	rcu_read_lock();
+	drm_vma_offset_lock_lookup(dev->vma_offset_manager);
+	node = drm_vma_offset_exact_lookup_locked(dev->vma_offset_manager,
+						  vma->vm_pgoff,
+						  vma_pages(vma));
+	if (node && drm_vma_node_is_allowed(node, priv)) {
+		/*
+		 * Skip 0-refcnted objects as it is in the process of being
+		 * destroyed and will be invalid when the vma manager lock
+		 * is released.
+		 */
+		if (!node->driver_private) {
+			mmo = container_of(node, struct i915_mmap_offset, vma_node);
+			obj = i915_gem_object_get_rcu(mmo->obj);
+
+			GEM_BUG_ON(obj && obj->ops->mmap_ops);
+		} else {
+			obj = i915_gem_object_get_rcu
+				(container_of(node, struct drm_i915_gem_object,
+					      base.vma_node));
+
+			GEM_BUG_ON(obj && !obj->ops->mmap_ops);
+		}
+	}
+	drm_vma_offset_unlock_lookup(dev->vma_offset_manager);
+	rcu_read_unlock();
+	if (!obj)
+		return node ? -EACCES : -EINVAL;
+
+	return i915_gem_object_mmap(obj, mmo, vma);
+}
+
+int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct drm_device *dev = &i915->drm;
+	struct i915_mmap_offset *mmo = NULL;
+	enum i915_mmap_type mmap_type;
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+
+	if (drm_dev_is_unplugged(dev))
+		return -ENODEV;
+
+	/* handle ttm object */
+	if (obj->ops->mmap_ops) {
+		/*
+		 * ttm fault handler, ttm_bo_vm_fault_reserved() uses fake offset
+		 * to calculate page offset so set that up.
+		 */
+		vma->vm_pgoff += drm_vma_node_start(&obj->base.vma_node);
+	} else {
+		/* handle stolen and smem objects */
+		mmap_type = i915_ggtt_has_aperture(ggtt) ? I915_MMAP_TYPE_GTT : I915_MMAP_TYPE_WC;
+		mmo = mmap_offset_attach(obj, mmap_type, NULL);
+		if (IS_ERR(mmo))
+			return PTR_ERR(mmo);
+	}
+
+	/*
+	 * When we install vm_ops for mmap we are too late for
+	 * the vm_ops->open() which increases the ref_count of
+	 * this obj and then it gets decreased by the vm_ops->close().
+	 * To balance this increase the obj ref_count here.
+	 */
+	obj = i915_gem_object_get(obj);
+	return i915_gem_object_mmap(obj, mmo, vma);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_mman.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.h b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
new file mode 100644
index 0000000000..196417fd0f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.h
@@ -0,0 +1,33 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_MMAN_H__
+#define __I915_GEM_MMAN_H__
+
+#include <linux/mm_types.h>
+#include <linux/types.h>
+
+struct drm_device;
+struct drm_file;
+struct drm_i915_gem_object;
+struct file;
+struct i915_mmap_offset;
+struct mutex;
+
+int i915_gem_mmap_gtt_version(void);
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma);
+
+int i915_gem_dumb_mmap_offset(struct drm_file *file_priv,
+			      struct drm_device *dev,
+			      u32 handle, u64 *offset);
+
+void __i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap_gtt(struct drm_i915_gem_object *obj);
+
+void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object *obj);
+void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj);
+int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma);
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
new file mode 100644
index 0000000000..ef9346ed6d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -0,0 +1,967 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <linux/highmem.h>
+#include <linux/sched/mm.h>
+
+#include <drm/drm_cache.h>
+
+#include "display/intel_frontbuffer.h"
+#include "pxp/intel_pxp.h"
+
+#include "i915_drv.h"
+#include "i915_file_private.h"
+#include "i915_gem_clflush.h"
+#include "i915_gem_context.h"
+#include "i915_gem_dmabuf.h"
+#include "i915_gem_mman.h"
+#include "i915_gem_object.h"
+#include "i915_gem_ttm.h"
+#include "i915_memcpy.h"
+#include "i915_trace.h"
+
+static struct kmem_cache *slab_objects;
+
+static const struct drm_gem_object_funcs i915_gem_object_funcs;
+
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+				    enum i915_cache_level level)
+{
+	if (drm_WARN_ON(&i915->drm, level >= I915_MAX_CACHE_LEVEL))
+		return 0;
+
+	return INTEL_INFO(i915)->cachelevel_to_pat[level];
+}
+
+bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
+				     enum i915_cache_level lvl)
+{
+	/*
+	 * In case the pat_index is set by user space, this kernel mode
+	 * driver should leave the coherency to be managed by user space,
+	 * simply return true here.
+	 */
+	if (obj->pat_set_by_user)
+		return true;
+
+	/*
+	 * Otherwise the pat_index should have been converted from cache_level
+	 * so that the following comparison is valid.
+	 */
+	return obj->pat_index == i915_gem_get_pat_index(obj_to_i915(obj), lvl);
+}
+
+struct drm_i915_gem_object *i915_gem_object_alloc(void)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = kmem_cache_zalloc(slab_objects, GFP_KERNEL);
+	if (!obj)
+		return NULL;
+	obj->base.funcs = &i915_gem_object_funcs;
+
+	return obj;
+}
+
+void i915_gem_object_free(struct drm_i915_gem_object *obj)
+{
+	return kmem_cache_free(slab_objects, obj);
+}
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops,
+			  struct lock_class_key *key, unsigned flags)
+{
+	/*
+	 * A gem object is embedded both in a struct ttm_buffer_object :/ and
+	 * in a drm_i915_gem_object. Make sure they are aliased.
+	 */
+	BUILD_BUG_ON(offsetof(typeof(*obj), base) !=
+		     offsetof(typeof(*obj), __do_not_access.base));
+
+	spin_lock_init(&obj->vma.lock);
+	INIT_LIST_HEAD(&obj->vma.list);
+
+	INIT_LIST_HEAD(&obj->mm.link);
+
+	INIT_LIST_HEAD(&obj->lut_list);
+	spin_lock_init(&obj->lut_lock);
+
+	spin_lock_init(&obj->mmo.lock);
+	obj->mmo.offsets = RB_ROOT;
+
+	init_rcu_head(&obj->rcu);
+
+	obj->ops = ops;
+	GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
+	obj->flags = flags;
+
+	obj->mm.madv = I915_MADV_WILLNEED;
+	INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->mm.get_page.lock);
+	INIT_RADIX_TREE(&obj->mm.get_dma_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->mm.get_dma_page.lock);
+}
+
+/**
+ * __i915_gem_object_fini - Clean up a GEM object initialization
+ * @obj: The gem object to cleanup
+ *
+ * This function cleans up gem object fields that are set up by
+ * drm_gem_private_object_init() and i915_gem_object_init().
+ * It's primarily intended as a helper for backends that need to
+ * clean up the gem object in separate steps.
+ */
+void __i915_gem_object_fini(struct drm_i915_gem_object *obj)
+{
+	mutex_destroy(&obj->mm.get_page.lock);
+	mutex_destroy(&obj->mm.get_dma_page.lock);
+	dma_resv_fini(&obj->base._resv);
+}
+
+/**
+ * i915_gem_object_set_cache_coherency - Mark up the object's coherency levels
+ * for a given cache_level
+ * @obj: #drm_i915_gem_object
+ * @cache_level: cache level
+ */
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+					 unsigned int cache_level)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	obj->pat_index = i915_gem_get_pat_index(i915, cache_level);
+
+	if (cache_level != I915_CACHE_NONE)
+		obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
+				       I915_BO_CACHE_COHERENT_FOR_WRITE);
+	else if (HAS_LLC(i915))
+		obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
+	else
+		obj->cache_coherent = 0;
+
+	obj->cache_dirty =
+		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
+		!IS_DGFX(i915);
+}
+
+/**
+ * i915_gem_object_set_pat_index - set PAT index to be used in PTE encode
+ * @obj: #drm_i915_gem_object
+ * @pat_index: PAT index
+ *
+ * This is a clone of i915_gem_object_set_cache_coherency taking pat index
+ * instead of cache_level as its second argument.
+ */
+void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj,
+				   unsigned int pat_index)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	if (obj->pat_index == pat_index)
+		return;
+
+	obj->pat_index = pat_index;
+
+	if (pat_index != i915_gem_get_pat_index(i915, I915_CACHE_NONE))
+		obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
+				       I915_BO_CACHE_COHERENT_FOR_WRITE);
+	else if (HAS_LLC(i915))
+		obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
+	else
+		obj->cache_coherent = 0;
+
+	obj->cache_dirty =
+		!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE) &&
+		!IS_DGFX(i915);
+}
+
+bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	/*
+	 * This is purely from a security perspective, so we simply don't care
+	 * about non-userspace objects being able to bypass the LLC.
+	 */
+	if (!(obj->flags & I915_BO_ALLOC_USER))
+		return false;
+
+	/*
+	 * Always flush cache for UMD objects at creation time.
+	 */
+	if (obj->pat_set_by_user)
+		return true;
+
+	/*
+	 * EHL and JSL add the 'Bypass LLC' MOCS entry, which should make it
+	 * possible for userspace to bypass the GTT caching bits set by the
+	 * kernel, as per the given object cache_level. This is troublesome
+	 * since the heavy flush we apply when first gathering the pages is
+	 * skipped if the kernel thinks the object is coherent with the GPU. As
+	 * a result it might be possible to bypass the cache and read the
+	 * contents of the page directly, which could be stale data. If it's
+	 * just a case of userspace shooting themselves in the foot then so be
+	 * it, but since i915 takes the stance of always zeroing memory before
+	 * handing it to userspace, we need to prevent this.
+	 */
+	return (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915));
+}
+
+static void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem);
+	struct drm_i915_file_private *fpriv = file->driver_priv;
+	struct i915_lut_handle bookmark = {};
+	struct i915_mmap_offset *mmo, *mn;
+	struct i915_lut_handle *lut, *ln;
+	LIST_HEAD(close);
+
+	spin_lock(&obj->lut_lock);
+	list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+
+		if (ctx && ctx->file_priv == fpriv) {
+			i915_gem_context_get(ctx);
+			list_move(&lut->obj_link, &close);
+		}
+
+		/* Break long locks, and carefully continue on from this spot */
+		if (&ln->obj_link != &obj->lut_list) {
+			list_add_tail(&bookmark.obj_link, &ln->obj_link);
+			if (cond_resched_lock(&obj->lut_lock))
+				list_safe_reset_next(&bookmark, ln, obj_link);
+			__list_del_entry(&bookmark.obj_link);
+		}
+	}
+	spin_unlock(&obj->lut_lock);
+
+	spin_lock(&obj->mmo.lock);
+	rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset)
+		drm_vma_node_revoke(&mmo->vma_node, file);
+	spin_unlock(&obj->mmo.lock);
+
+	list_for_each_entry_safe(lut, ln, &close, obj_link) {
+		struct i915_gem_context *ctx = lut->ctx;
+		struct i915_vma *vma;
+
+		/*
+		 * We allow the process to have multiple handles to the same
+		 * vma, in the same fd namespace, by virtue of flink/open.
+		 */
+
+		mutex_lock(&ctx->lut_mutex);
+		vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
+		if (vma) {
+			GEM_BUG_ON(vma->obj != obj);
+			GEM_BUG_ON(!atomic_read(&vma->open_count));
+			i915_vma_close(vma);
+		}
+		mutex_unlock(&ctx->lut_mutex);
+
+		i915_gem_context_put(lut->ctx);
+		i915_lut_handle_free(lut);
+		i915_gem_object_put(obj);
+	}
+}
+
+void __i915_gem_free_object_rcu(struct rcu_head *head)
+{
+	struct drm_i915_gem_object *obj =
+		container_of(head, typeof(*obj), rcu);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	i915_gem_object_free(obj);
+
+	GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
+	atomic_dec(&i915->mm.free_count);
+}
+
+static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
+{
+	/* Skip serialisation and waking the device if known to be not used. */
+
+	if (obj->userfault_count && !IS_DGFX(to_i915(obj->base.dev)))
+		i915_gem_object_release_mmap_gtt(obj);
+
+	if (!RB_EMPTY_ROOT(&obj->mmo.offsets)) {
+		struct i915_mmap_offset *mmo, *mn;
+
+		i915_gem_object_release_mmap_offset(obj);
+
+		rbtree_postorder_for_each_entry_safe(mmo, mn,
+						     &obj->mmo.offsets,
+						     offset) {
+			drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
+					      &mmo->vma_node);
+			kfree(mmo);
+		}
+		obj->mmo.offsets = RB_ROOT;
+	}
+}
+
+/**
+ * __i915_gem_object_pages_fini - Clean up pages use of a gem object
+ * @obj: The gem object to clean up
+ *
+ * This function cleans up usage of the object mm.pages member. It
+ * is intended for backends that need to clean up a gem object in
+ * separate steps and needs to be called when the object is idle before
+ * the object's backing memory is freed.
+ */
+void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj)
+{
+	assert_object_held_shared(obj);
+
+	if (!list_empty(&obj->vma.list)) {
+		struct i915_vma *vma;
+
+		spin_lock(&obj->vma.lock);
+		while ((vma = list_first_entry_or_null(&obj->vma.list,
+						       struct i915_vma,
+						       obj_link))) {
+			GEM_BUG_ON(vma->obj != obj);
+			spin_unlock(&obj->vma.lock);
+
+			i915_vma_destroy(vma);
+
+			spin_lock(&obj->vma.lock);
+		}
+		spin_unlock(&obj->vma.lock);
+	}
+
+	__i915_gem_object_free_mmaps(obj);
+
+	atomic_set(&obj->mm.pages_pin_count, 0);
+
+	/*
+	 * dma_buf_unmap_attachment() requires reservation to be
+	 * locked. The imported GEM shouldn't share reservation lock
+	 * and ttm_bo_cleanup_memtype_use() shouldn't be invoked for
+	 * dma-buf, so it's safe to take the lock.
+	 */
+	if (obj->base.import_attach)
+		i915_gem_object_lock(obj, NULL);
+
+	__i915_gem_object_put_pages(obj);
+
+	if (obj->base.import_attach)
+		i915_gem_object_unlock(obj);
+
+	GEM_BUG_ON(i915_gem_object_has_pages(obj));
+}
+
+void __i915_gem_free_object(struct drm_i915_gem_object *obj)
+{
+	trace_i915_gem_object_destroy(obj);
+
+	GEM_BUG_ON(!list_empty(&obj->lut_list));
+
+	bitmap_free(obj->bit_17);
+
+	if (obj->base.import_attach)
+		drm_prime_gem_destroy(&obj->base, NULL);
+
+	drm_gem_free_mmap_offset(&obj->base);
+
+	if (obj->ops->release)
+		obj->ops->release(obj);
+
+	if (obj->mm.n_placements > 1)
+		kfree(obj->mm.placements);
+
+	if (obj->shares_resv_from)
+		i915_vm_resv_put(obj->shares_resv_from);
+
+	__i915_gem_object_fini(obj);
+}
+
+static void __i915_gem_free_objects(struct drm_i915_private *i915,
+				    struct llist_node *freed)
+{
+	struct drm_i915_gem_object *obj, *on;
+
+	llist_for_each_entry_safe(obj, on, freed, freed) {
+		might_sleep();
+		if (obj->ops->delayed_free) {
+			obj->ops->delayed_free(obj);
+			continue;
+		}
+
+		__i915_gem_object_pages_fini(obj);
+		__i915_gem_free_object(obj);
+
+		/* But keep the pointer alive for RCU-protected lookups */
+		call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+		cond_resched();
+	}
+}
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915)
+{
+	struct llist_node *freed = llist_del_all(&i915->mm.free_list);
+
+	if (unlikely(freed))
+		__i915_gem_free_objects(i915, freed);
+}
+
+static void __i915_gem_free_work(struct work_struct *work)
+{
+	struct drm_i915_private *i915 =
+		container_of(work, struct drm_i915_private, mm.free_work);
+
+	i915_gem_flush_free_objects(i915);
+}
+
+static void i915_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
+
+	/*
+	 * Before we free the object, make sure any pure RCU-only
+	 * read-side critical sections are complete, e.g.
+	 * i915_gem_busy_ioctl(). For the corresponding synchronized
+	 * lookup see i915_gem_object_lookup_rcu().
+	 */
+	atomic_inc(&i915->mm.free_count);
+
+	/*
+	 * Since we require blocking on struct_mutex to unbind the freed
+	 * object from the GPU before releasing resources back to the
+	 * system, we can not do that directly from the RCU callback (which may
+	 * be a softirq context), but must instead then defer that work onto a
+	 * kthread. We use the RCU callback rather than move the freed object
+	 * directly onto the work queue so that we can mix between using the
+	 * worker and performing frees directly from subsequent allocations for
+	 * crude but effective memory throttling.
+	 */
+
+	if (llist_add(&obj->freed, &i915->mm.free_list))
+		queue_work(i915->wq, &i915->mm.free_work);
+}
+
+void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+					 enum fb_op_origin origin)
+{
+	struct intel_frontbuffer *front;
+
+	front = i915_gem_object_get_frontbuffer(obj);
+	if (front) {
+		intel_frontbuffer_flush(front, origin);
+		intel_frontbuffer_put(front);
+	}
+}
+
+void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+					      enum fb_op_origin origin)
+{
+	struct intel_frontbuffer *front;
+
+	front = i915_gem_object_get_frontbuffer(obj);
+	if (front) {
+		intel_frontbuffer_invalidate(front, origin);
+		intel_frontbuffer_put(front);
+	}
+}
+
+static void
+i915_gem_object_read_from_page_kmap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
+{
+	pgoff_t idx = offset >> PAGE_SHIFT;
+	void *src_map;
+	void *src_ptr;
+
+	src_map = kmap_atomic(i915_gem_object_get_page(obj, idx));
+
+	src_ptr = src_map + offset_in_page(offset);
+	if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+		drm_clflush_virt_range(src_ptr, size);
+	memcpy(dst, src_ptr, size);
+
+	kunmap_atomic(src_map);
+}
+
+static void
+i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
+{
+	pgoff_t idx = offset >> PAGE_SHIFT;
+	dma_addr_t dma = i915_gem_object_get_dma_address(obj, idx);
+	void __iomem *src_map;
+	void __iomem *src_ptr;
+
+	src_map = io_mapping_map_wc(&obj->mm.region->iomap,
+				    dma - obj->mm.region->region.start,
+				    PAGE_SIZE);
+
+	src_ptr = src_map + offset_in_page(offset);
+	if (!i915_memcpy_from_wc(dst, (void __force *)src_ptr, size))
+		memcpy_fromio(dst, src_ptr, size);
+
+	io_mapping_unmap(src_map);
+}
+
+static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_iomem(obj));
+
+	if (IS_DGFX(to_i915(obj->base.dev)))
+		return i915_ttm_resource_mappable(i915_gem_to_ttm(obj)->resource);
+
+	return true;
+}
+
+/**
+ * i915_gem_object_read_from_page - read data from the page of a GEM object
+ * @obj: GEM object to read from
+ * @offset: offset within the object
+ * @dst: buffer to store the read data
+ * @size: size to read
+ *
+ * Reads data from @obj at the specified offset. The requested region to read
+ * from can't cross a page boundary. The caller must ensure that @obj pages
+ * are pinned and that @obj is synced wrt. any related writes.
+ *
+ * Return: %0 on success or -ENODEV if the type of @obj's backing store is
+ * unsupported.
+ */
+int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size)
+{
+	GEM_BUG_ON(overflows_type(offset >> PAGE_SHIFT, pgoff_t));
+	GEM_BUG_ON(offset >= obj->base.size);
+	GEM_BUG_ON(offset_in_page(offset) > PAGE_SIZE - size);
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	if (i915_gem_object_has_struct_page(obj))
+		i915_gem_object_read_from_page_kmap(obj, offset, dst, size);
+	else if (i915_gem_object_has_iomem(obj) && object_has_mappable_iomem(obj))
+		i915_gem_object_read_from_page_iomap(obj, offset, dst, size);
+	else
+		return -ENODEV;
+
+	return 0;
+}
+
+/**
+ * i915_gem_object_evictable - Whether object is likely evictable after unbind.
+ * @obj: The object to check
+ *
+ * This function checks whether the object is likely unvictable after unbind.
+ * If the object is not locked when checking, the result is only advisory.
+ * If the object is locked when checking, and the function returns true,
+ * then an eviction should indeed be possible. But since unlocked vma
+ * unpinning and unbinding is currently possible, the object can actually
+ * become evictable even if this function returns false.
+ *
+ * Return: true if the object may be evictable. False otherwise.
+ */
+bool i915_gem_object_evictable(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	int pin_count = atomic_read(&obj->mm.pages_pin_count);
+
+	if (!pin_count)
+		return true;
+
+	spin_lock(&obj->vma.lock);
+	list_for_each_entry(vma, &obj->vma.list, obj_link) {
+		if (i915_vma_is_pinned(vma)) {
+			spin_unlock(&obj->vma.lock);
+			return false;
+		}
+		if (atomic_read(&vma->pages_count))
+			pin_count--;
+	}
+	spin_unlock(&obj->vma.lock);
+	GEM_WARN_ON(pin_count < 0);
+
+	return pin_count == 0;
+}
+
+/**
+ * i915_gem_object_migratable - Whether the object is migratable out of the
+ * current region.
+ * @obj: Pointer to the object.
+ *
+ * Return: Whether the object is allowed to be resident in other
+ * regions than the current while pages are present.
+ */
+bool i915_gem_object_migratable(struct drm_i915_gem_object *obj)
+{
+	struct intel_memory_region *mr = READ_ONCE(obj->mm.region);
+
+	if (!mr)
+		return false;
+
+	return obj->mm.n_placements > 1;
+}
+
+/**
+ * i915_gem_object_has_struct_page - Whether the object is page-backed
+ * @obj: The object to query.
+ *
+ * This function should only be called while the object is locked or pinned,
+ * otherwise the page backing may change under the caller.
+ *
+ * Return: True if page-backed, false otherwise.
+ */
+bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj)
+{
+#ifdef CONFIG_LOCKDEP
+	if (IS_DGFX(to_i915(obj->base.dev)) &&
+	    i915_gem_object_evictable((void __force *)obj))
+		assert_object_held_shared(obj);
+#endif
+	return obj->mem_flags & I915_BO_FLAG_STRUCT_PAGE;
+}
+
+/**
+ * i915_gem_object_has_iomem - Whether the object is iomem-backed
+ * @obj: The object to query.
+ *
+ * This function should only be called while the object is locked or pinned,
+ * otherwise the iomem backing may change under the caller.
+ *
+ * Return: True if iomem-backed, false otherwise.
+ */
+bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj)
+{
+#ifdef CONFIG_LOCKDEP
+	if (IS_DGFX(to_i915(obj->base.dev)) &&
+	    i915_gem_object_evictable((void __force *)obj))
+		assert_object_held_shared(obj);
+#endif
+	return obj->mem_flags & I915_BO_FLAG_IOMEM;
+}
+
+/**
+ * i915_gem_object_can_migrate - Whether an object likely can be migrated
+ *
+ * @obj: The object to migrate
+ * @id: The region intended to migrate to
+ *
+ * Check whether the object backend supports migration to the
+ * given region. Note that pinning may affect the ability to migrate as
+ * returned by this function.
+ *
+ * This function is primarily intended as a helper for checking the
+ * possibility to migrate objects and might be slightly less permissive
+ * than i915_gem_object_migrate() when it comes to objects with the
+ * I915_BO_ALLOC_USER flag set.
+ *
+ * Return: true if migration is possible, false otherwise.
+ */
+bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
+				 enum intel_region_id id)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned int num_allowed = obj->mm.n_placements;
+	struct intel_memory_region *mr;
+	unsigned int i;
+
+	GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN);
+	GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED);
+
+	mr = i915->mm.regions[id];
+	if (!mr)
+		return false;
+
+	if (!IS_ALIGNED(obj->base.size, mr->min_page_size))
+		return false;
+
+	if (obj->mm.region == mr)
+		return true;
+
+	if (!i915_gem_object_evictable(obj))
+		return false;
+
+	if (!obj->ops->migrate)
+		return false;
+
+	if (!(obj->flags & I915_BO_ALLOC_USER))
+		return true;
+
+	if (num_allowed == 0)
+		return false;
+
+	for (i = 0; i < num_allowed; ++i) {
+		if (mr == obj->mm.placements[i])
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * i915_gem_object_migrate - Migrate an object to the desired region id
+ * @obj: The object to migrate.
+ * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may
+ * not be successful in evicting other objects to make room for this object.
+ * @id: The region id to migrate to.
+ *
+ * Attempt to migrate the object to the desired memory region. The
+ * object backend must support migration and the object may not be
+ * pinned, (explicitly pinned pages or pinned vmas). The object must
+ * be locked.
+ * On successful completion, the object will have pages pointing to
+ * memory in the new region, but an async migration task may not have
+ * completed yet, and to accomplish that, i915_gem_object_wait_migration()
+ * must be called.
+ *
+ * Note: the @ww parameter is not used yet, but included to make sure
+ * callers put some effort into obtaining a valid ww ctx if one is
+ * available.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance
+ * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and
+ * -EBUSY if the object is pinned.
+ */
+int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
+			    struct i915_gem_ww_ctx *ww,
+			    enum intel_region_id id)
+{
+	return __i915_gem_object_migrate(obj, ww, id, obj->flags);
+}
+
+/**
+ * __i915_gem_object_migrate - Migrate an object to the desired region id, with
+ * control of the extra flags
+ * @obj: The object to migrate.
+ * @ww: An optional struct i915_gem_ww_ctx. If NULL, the backend may
+ * not be successful in evicting other objects to make room for this object.
+ * @id: The region id to migrate to.
+ * @flags: The object flags. Normally just obj->flags.
+ *
+ * Attempt to migrate the object to the desired memory region. The
+ * object backend must support migration and the object may not be
+ * pinned, (explicitly pinned pages or pinned vmas). The object must
+ * be locked.
+ * On successful completion, the object will have pages pointing to
+ * memory in the new region, but an async migration task may not have
+ * completed yet, and to accomplish that, i915_gem_object_wait_migration()
+ * must be called.
+ *
+ * Note: the @ww parameter is not used yet, but included to make sure
+ * callers put some effort into obtaining a valid ww ctx if one is
+ * available.
+ *
+ * Return: 0 on success. Negative error code on failure. In particular may
+ * return -ENXIO on lack of region space, -EDEADLK for deadlock avoidance
+ * if @ww is set, -EINTR or -ERESTARTSYS if signal pending, and
+ * -EBUSY if the object is pinned.
+ */
+int __i915_gem_object_migrate(struct drm_i915_gem_object *obj,
+			      struct i915_gem_ww_ctx *ww,
+			      enum intel_region_id id,
+			      unsigned int flags)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_memory_region *mr;
+
+	GEM_BUG_ON(id >= INTEL_REGION_UNKNOWN);
+	GEM_BUG_ON(obj->mm.madv != I915_MADV_WILLNEED);
+	assert_object_held(obj);
+
+	mr = i915->mm.regions[id];
+	GEM_BUG_ON(!mr);
+
+	if (!i915_gem_object_can_migrate(obj, id))
+		return -EINVAL;
+
+	if (!obj->ops->migrate) {
+		if (GEM_WARN_ON(obj->mm.region != mr))
+			return -EINVAL;
+		return 0;
+	}
+
+	return obj->ops->migrate(obj, mr, flags);
+}
+
+/**
+ * i915_gem_object_placement_possible - Check whether the object can be
+ * placed at certain memory type
+ * @obj: Pointer to the object
+ * @type: The memory type to check
+ *
+ * Return: True if the object can be placed in @type. False otherwise.
+ */
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
+					enum intel_memory_type type)
+{
+	unsigned int i;
+
+	if (!obj->mm.n_placements) {
+		switch (type) {
+		case INTEL_MEMORY_LOCAL:
+			return i915_gem_object_has_iomem(obj);
+		case INTEL_MEMORY_SYSTEM:
+			return i915_gem_object_has_pages(obj);
+		default:
+			/* Ignore stolen for now */
+			GEM_BUG_ON(1);
+			return false;
+		}
+	}
+
+	for (i = 0; i < obj->mm.n_placements; i++) {
+		if (obj->mm.placements[i]->type == type)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * i915_gem_object_needs_ccs_pages - Check whether the object requires extra
+ * pages when placed in system-memory, in order to save and later restore the
+ * flat-CCS aux state when the object is moved between local-memory and
+ * system-memory
+ * @obj: Pointer to the object
+ *
+ * Return: True if the object needs extra ccs pages. False otherwise.
+ */
+bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj)
+{
+	bool lmem_placement = false;
+	int i;
+
+	if (!HAS_FLAT_CCS(to_i915(obj->base.dev)))
+		return false;
+
+	if (obj->flags & I915_BO_ALLOC_CCS_AUX)
+		return true;
+
+	for (i = 0; i < obj->mm.n_placements; i++) {
+		/* Compression is not allowed for the objects with smem placement */
+		if (obj->mm.placements[i]->type == INTEL_MEMORY_SYSTEM)
+			return false;
+		if (!lmem_placement &&
+		    obj->mm.placements[i]->type == INTEL_MEMORY_LOCAL)
+			lmem_placement = true;
+	}
+
+	return lmem_placement;
+}
+
+void i915_gem_init__objects(struct drm_i915_private *i915)
+{
+	INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
+}
+
+void i915_objects_module_exit(void)
+{
+	kmem_cache_destroy(slab_objects);
+}
+
+int __init i915_objects_module_init(void)
+{
+	slab_objects = KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
+	if (!slab_objects)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static const struct drm_gem_object_funcs i915_gem_object_funcs = {
+	.free = i915_gem_free_object,
+	.close = i915_gem_close_object,
+	.export = i915_gem_prime_export,
+};
+
+/**
+ * i915_gem_object_get_moving_fence - Get the object's moving fence if any
+ * @obj: The object whose moving fence to get.
+ * @fence: The resulting fence
+ *
+ * A non-signaled moving fence means that there is an async operation
+ * pending on the object that needs to be waited on before setting up
+ * any GPU- or CPU PTEs to the object's pages.
+ *
+ * Return: Negative error code or 0 for success.
+ */
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
+				     struct dma_fence **fence)
+{
+	return dma_resv_get_singleton(obj->base.resv, DMA_RESV_USAGE_KERNEL,
+				      fence);
+}
+
+/**
+ * i915_gem_object_wait_moving_fence - Wait for the object's moving fence if any
+ * @obj: The object whose moving fence to wait for.
+ * @intr: Whether to wait interruptible.
+ *
+ * If the moving fence signaled without an error, it is detached from the
+ * object and put.
+ *
+ * Return: 0 if successful, -ERESTARTSYS if the wait was interrupted,
+ * negative error code if the async operation represented by the
+ * moving fence failed.
+ */
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+				      bool intr)
+{
+	long ret;
+
+	assert_object_held(obj);
+
+	ret = dma_resv_wait_timeout(obj->base. resv, DMA_RESV_USAGE_KERNEL,
+				    intr, MAX_SCHEDULE_TIMEOUT);
+	if (!ret)
+		ret = -ETIME;
+	else if (ret > 0 && i915_gem_object_has_unknown_state(obj))
+		ret = -EIO;
+
+	return ret < 0 ? ret : 0;
+}
+
+/*
+ * i915_gem_object_has_unknown_state - Return true if the object backing pages are
+ * in an unknown_state. This means that userspace must NEVER be allowed to touch
+ * the pages, with either the GPU or CPU.
+ *
+ * ONLY valid to be called after ensuring that all kernel fences have signalled
+ * (in particular the fence for moving/clearing the object).
+ */
+bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * The below barrier pairs with the dma_fence_signal() in
+	 * __memcpy_work(). We should only sample the unknown_state after all
+	 * the kernel fences have signalled.
+	 */
+	smp_rmb();
+	return obj->mm.unknown_state;
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/huge_gem_object.c"
+#include "selftests/huge_pages.c"
+#include "selftests/i915_gem_migrate.c"
+#include "selftests/i915_gem_object.c"
+#include "selftests/i915_gem_coherency.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
new file mode 100644
index 0000000000..f607b87890
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -0,0 +1,957 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_H__
+#define __I915_GEM_OBJECT_H__
+
+#include <drm/drm_gem.h>
+#include <drm/drm_file.h>
+#include <drm/drm_device.h>
+
+#include "display/intel_frontbuffer.h"
+#include "intel_memory_region.h"
+#include "i915_gem_object_types.h"
+#include "i915_gem_gtt.h"
+#include "i915_gem_ww.h"
+#include "i915_vma_types.h"
+
+enum intel_region_id;
+
+#define obj_to_i915(obj__) to_i915((obj__)->base.dev)
+
+static inline bool i915_gem_object_size_2big(u64 size)
+{
+	struct drm_i915_gem_object *obj;
+
+	if (overflows_type(size, obj->base.size))
+		return true;
+
+	return false;
+}
+
+unsigned int i915_gem_get_pat_index(struct drm_i915_private *i915,
+				    enum i915_cache_level level);
+bool i915_gem_object_has_cache_level(const struct drm_i915_gem_object *obj,
+				     enum i915_cache_level lvl);
+void i915_gem_init__objects(struct drm_i915_private *i915);
+
+void i915_objects_module_exit(void);
+int i915_objects_module_init(void);
+
+struct drm_i915_gem_object *i915_gem_object_alloc(void);
+void i915_gem_object_free(struct drm_i915_gem_object *obj);
+
+void i915_gem_object_init(struct drm_i915_gem_object *obj,
+			  const struct drm_i915_gem_object_ops *ops,
+			  struct lock_class_key *key,
+			  unsigned alloc_flags);
+
+void __i915_gem_object_fini(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915,
+			     resource_size_t size);
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *i915,
+				       const void *data, resource_size_t size);
+struct drm_i915_gem_object *
+__i915_gem_object_create_user(struct drm_i915_private *i915, u64 size,
+			      struct intel_memory_region **placements,
+			      unsigned int n_placements);
+
+extern const struct drm_i915_gem_object_ops i915_gem_shmem_ops;
+
+void __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				     struct sg_table *pages,
+				     bool needs_clflush);
+
+int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj,
+				const struct drm_i915_gem_pwrite *args);
+int i915_gem_object_pread_phys(struct drm_i915_gem_object *obj,
+			       const struct drm_i915_gem_pread *args);
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align);
+void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj,
+				     struct sg_table *pages);
+void i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+				    struct sg_table *pages);
+
+void i915_gem_flush_free_objects(struct drm_i915_private *i915);
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj);
+
+/**
+ * i915_gem_object_lookup_rcu - look up a temporary GEM object from its handle
+ * @file: DRM file private date
+ * @handle: userspace handle
+ *
+ * Returns:
+ *
+ * A pointer to the object named by the handle if such exists on @filp, NULL
+ * otherwise. This object is only valid whilst under the RCU read lock, and
+ * note carefully the object may be in the process of being destroyed.
+ */
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup_rcu(struct drm_file *file, u32 handle)
+{
+#ifdef CONFIG_LOCKDEP
+	WARN_ON(debug_locks && !lock_is_held(&rcu_lock_map));
+#endif
+	return idr_find(&file->object_idr, handle);
+}
+
+static inline struct drm_i915_gem_object *
+i915_gem_object_get_rcu(struct drm_i915_gem_object *obj)
+{
+	if (obj && !kref_get_unless_zero(&obj->base.refcount))
+		obj = NULL;
+
+	return obj;
+}
+
+static inline struct drm_i915_gem_object *
+i915_gem_object_lookup(struct drm_file *file, u32 handle)
+{
+	struct drm_i915_gem_object *obj;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, handle);
+	obj = i915_gem_object_get_rcu(obj);
+	rcu_read_unlock();
+
+	return obj;
+}
+
+__deprecated
+struct drm_gem_object *
+drm_gem_object_lookup(struct drm_file *file, u32 handle);
+
+__attribute__((nonnull))
+static inline struct drm_i915_gem_object *
+i915_gem_object_get(struct drm_i915_gem_object *obj)
+{
+	drm_gem_object_get(&obj->base);
+	return obj;
+}
+
+__attribute__((nonnull))
+static inline void
+i915_gem_object_put(struct drm_i915_gem_object *obj)
+{
+	__drm_gem_object_put(&obj->base);
+}
+
+#define assert_object_held(obj) dma_resv_assert_held((obj)->base.resv)
+
+/*
+ * If more than one potential simultaneous locker, assert held.
+ */
+static inline void assert_object_held_shared(const struct drm_i915_gem_object *obj)
+{
+	/*
+	 * Note mm list lookup is protected by
+	 * kref_get_unless_zero().
+	 */
+	if (IS_ENABLED(CONFIG_LOCKDEP) &&
+	    kref_read(&obj->base.refcount) > 0)
+		assert_object_held(obj);
+}
+
+static inline int __i915_gem_object_lock(struct drm_i915_gem_object *obj,
+					 struct i915_gem_ww_ctx *ww,
+					 bool intr)
+{
+	int ret;
+
+	if (intr)
+		ret = dma_resv_lock_interruptible(obj->base.resv, ww ? &ww->ctx : NULL);
+	else
+		ret = dma_resv_lock(obj->base.resv, ww ? &ww->ctx : NULL);
+
+	if (!ret && ww) {
+		i915_gem_object_get(obj);
+		list_add_tail(&obj->obj_link, &ww->obj_list);
+	}
+	if (ret == -EALREADY)
+		ret = 0;
+
+	if (ret == -EDEADLK) {
+		i915_gem_object_get(obj);
+		ww->contended = obj;
+	}
+
+	return ret;
+}
+
+static inline int i915_gem_object_lock(struct drm_i915_gem_object *obj,
+				       struct i915_gem_ww_ctx *ww)
+{
+	return __i915_gem_object_lock(obj, ww, ww && ww->intr);
+}
+
+static inline int i915_gem_object_lock_interruptible(struct drm_i915_gem_object *obj,
+						     struct i915_gem_ww_ctx *ww)
+{
+	WARN_ON(ww && !ww->intr);
+	return __i915_gem_object_lock(obj, ww, true);
+}
+
+static inline bool i915_gem_object_trylock(struct drm_i915_gem_object *obj,
+					   struct i915_gem_ww_ctx *ww)
+{
+	if (!ww)
+		return dma_resv_trylock(obj->base.resv);
+	else
+		return ww_mutex_trylock(&obj->base.resv->lock, &ww->ctx);
+}
+
+static inline void i915_gem_object_unlock(struct drm_i915_gem_object *obj)
+{
+	if (obj->ops->adjust_lru)
+		obj->ops->adjust_lru(obj);
+
+	dma_resv_unlock(obj->base.resv);
+}
+
+static inline void
+i915_gem_object_set_readonly(struct drm_i915_gem_object *obj)
+{
+	obj->flags |= I915_BO_READONLY;
+}
+
+static inline bool
+i915_gem_object_is_readonly(const struct drm_i915_gem_object *obj)
+{
+	return obj->flags & I915_BO_READONLY;
+}
+
+static inline bool
+i915_gem_object_is_contiguous(const struct drm_i915_gem_object *obj)
+{
+	return obj->flags & I915_BO_ALLOC_CONTIGUOUS;
+}
+
+static inline bool
+i915_gem_object_is_volatile(const struct drm_i915_gem_object *obj)
+{
+	return obj->flags & I915_BO_ALLOC_VOLATILE;
+}
+
+static inline void
+i915_gem_object_set_volatile(struct drm_i915_gem_object *obj)
+{
+	obj->flags |= I915_BO_ALLOC_VOLATILE;
+}
+
+static inline bool
+i915_gem_object_has_tiling_quirk(struct drm_i915_gem_object *obj)
+{
+	return test_bit(I915_TILING_QUIRK_BIT, &obj->flags);
+}
+
+static inline void
+i915_gem_object_set_tiling_quirk(struct drm_i915_gem_object *obj)
+{
+	set_bit(I915_TILING_QUIRK_BIT, &obj->flags);
+}
+
+static inline void
+i915_gem_object_clear_tiling_quirk(struct drm_i915_gem_object *obj)
+{
+	clear_bit(I915_TILING_QUIRK_BIT, &obj->flags);
+}
+
+static inline bool
+i915_gem_object_is_protected(const struct drm_i915_gem_object *obj)
+{
+	return obj->flags & I915_BO_PROTECTED;
+}
+
+static inline bool
+i915_gem_object_type_has(const struct drm_i915_gem_object *obj,
+			 unsigned long flags)
+{
+	return obj->ops->flags & flags;
+}
+
+bool i915_gem_object_has_struct_page(const struct drm_i915_gem_object *obj);
+
+bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj);
+
+static inline bool
+i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE);
+}
+
+static inline bool
+i915_gem_object_has_self_managed_shrink_list(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST);
+}
+
+static inline bool
+i915_gem_object_is_proxy(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_PROXY);
+}
+
+static inline bool
+i915_gem_object_never_mmap(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_type_has(obj, I915_GEM_OBJECT_NO_MMAP);
+}
+
+static inline bool
+i915_gem_object_is_framebuffer(const struct drm_i915_gem_object *obj)
+{
+	return READ_ONCE(obj->frontbuffer) || obj->is_dpt;
+}
+
+static inline unsigned int
+i915_gem_object_get_tiling(const struct drm_i915_gem_object *obj)
+{
+	return obj->tiling_and_stride & TILING_MASK;
+}
+
+static inline bool
+i915_gem_object_is_tiled(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_object_get_tiling(obj) != I915_TILING_NONE;
+}
+
+static inline unsigned int
+i915_gem_object_get_stride(const struct drm_i915_gem_object *obj)
+{
+	return obj->tiling_and_stride & STRIDE_MASK;
+}
+
+static inline unsigned int
+i915_gem_tile_height(unsigned int tiling)
+{
+	GEM_BUG_ON(!tiling);
+	return tiling == I915_TILING_Y ? 32 : 8;
+}
+
+static inline unsigned int
+i915_gem_object_get_tile_height(const struct drm_i915_gem_object *obj)
+{
+	return i915_gem_tile_height(i915_gem_object_get_tiling(obj));
+}
+
+static inline unsigned int
+i915_gem_object_get_tile_row_size(const struct drm_i915_gem_object *obj)
+{
+	return (i915_gem_object_get_stride(obj) *
+		i915_gem_object_get_tile_height(obj));
+}
+
+int i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+			       unsigned int tiling, unsigned int stride);
+
+/**
+ * __i915_gem_object_page_iter_get_sg - helper to find the target scatterlist
+ * pointer and the target page position using pgoff_t n input argument and
+ * i915_gem_object_page_iter
+ * @obj: i915 GEM buffer object
+ * @iter: i915 GEM buffer object page iterator
+ * @n: page offset
+ * @offset: searched physical offset,
+ *          it will be used for returning physical page offset value
+ *
+ * Context: Takes and releases the mutex lock of the i915_gem_object_page_iter.
+ *          Takes and releases the RCU lock to search the radix_tree of
+ *          i915_gem_object_page_iter.
+ *
+ * Returns:
+ * The target scatterlist pointer and the target page position.
+ *
+ * Recommended to use wrapper macro: i915_gem_object_page_iter_get_sg()
+ */
+struct scatterlist *
+__i915_gem_object_page_iter_get_sg(struct drm_i915_gem_object *obj,
+				   struct i915_gem_object_page_iter *iter,
+				   pgoff_t  n,
+				   unsigned int *offset);
+
+/**
+ * i915_gem_object_page_iter_get_sg - wrapper macro for
+ * __i915_gem_object_page_iter_get_sg()
+ * @obj: i915 GEM buffer object
+ * @it: i915 GEM buffer object page iterator
+ * @n: page offset
+ * @offset: searched physical offset,
+ *          it will be used for returning physical page offset value
+ *
+ * Context: Takes and releases the mutex lock of the i915_gem_object_page_iter.
+ *          Takes and releases the RCU lock to search the radix_tree of
+ *          i915_gem_object_page_iter.
+ *
+ * Returns:
+ * The target scatterlist pointer and the target page position.
+ *
+ * In order to avoid the truncation of the input parameter, it checks the page
+ * offset n's type from the input parameter before calling
+ * __i915_gem_object_page_iter_get_sg().
+ */
+#define i915_gem_object_page_iter_get_sg(obj, it, n, offset) ({	\
+	static_assert(castable_to_type(n, pgoff_t));		\
+	__i915_gem_object_page_iter_get_sg(obj, it, n, offset);	\
+})
+
+/**
+ * __i915_gem_object_get_sg - helper to find the target scatterlist
+ * pointer and the target page position using pgoff_t n input argument and
+ * drm_i915_gem_object. It uses an internal shmem scatterlist lookup function.
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ * @offset: searched physical offset,
+ *          it will be used for returning physical page offset value
+ *
+ * It uses drm_i915_gem_object's internal shmem scatterlist lookup function as
+ * i915_gem_object_page_iter and calls __i915_gem_object_page_iter_get_sg().
+ *
+ * Returns:
+ * The target scatterlist pointer and the target page position.
+ *
+ * Recommended to use wrapper macro: i915_gem_object_get_sg()
+ * See also __i915_gem_object_page_iter_get_sg()
+ */
+static inline struct scatterlist *
+__i915_gem_object_get_sg(struct drm_i915_gem_object *obj, pgoff_t n,
+			 unsigned int *offset)
+{
+	return __i915_gem_object_page_iter_get_sg(obj, &obj->mm.get_page, n, offset);
+}
+
+/**
+ * i915_gem_object_get_sg - wrapper macro for __i915_gem_object_get_sg()
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ * @offset: searched physical offset,
+ *          it will be used for returning physical page offset value
+ *
+ * Returns:
+ * The target scatterlist pointer and the target page position.
+ *
+ * In order to avoid the truncation of the input parameter, it checks the page
+ * offset n's type from the input parameter before calling
+ * __i915_gem_object_get_sg().
+ * See also __i915_gem_object_page_iter_get_sg()
+ */
+#define i915_gem_object_get_sg(obj, n, offset) ({	\
+	static_assert(castable_to_type(n, pgoff_t));	\
+	__i915_gem_object_get_sg(obj, n, offset);	\
+})
+
+/**
+ * __i915_gem_object_get_sg_dma - helper to find the target scatterlist
+ * pointer and the target page position using pgoff_t n input argument and
+ * drm_i915_gem_object. It uses an internal DMA mapped scatterlist lookup function
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ * @offset: searched physical offset,
+ *          it will be used for returning physical page offset value
+ *
+ * It uses drm_i915_gem_object's internal DMA mapped scatterlist lookup function
+ * as i915_gem_object_page_iter and calls __i915_gem_object_page_iter_get_sg().
+ *
+ * Returns:
+ * The target scatterlist pointer and the target page position.
+ *
+ * Recommended to use wrapper macro: i915_gem_object_get_sg_dma()
+ * See also __i915_gem_object_page_iter_get_sg()
+ */
+static inline struct scatterlist *
+__i915_gem_object_get_sg_dma(struct drm_i915_gem_object *obj, pgoff_t n,
+			     unsigned int *offset)
+{
+	return __i915_gem_object_page_iter_get_sg(obj, &obj->mm.get_dma_page, n, offset);
+}
+
+/**
+ * i915_gem_object_get_sg_dma - wrapper macro for __i915_gem_object_get_sg_dma()
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ * @offset: searched physical offset,
+ *          it will be used for returning physical page offset value
+ *
+ * Returns:
+ * The target scatterlist pointer and the target page position.
+ *
+ * In order to avoid the truncation of the input parameter, it checks the page
+ * offset n's type from the input parameter before calling
+ * __i915_gem_object_get_sg_dma().
+ * See also __i915_gem_object_page_iter_get_sg()
+ */
+#define i915_gem_object_get_sg_dma(obj, n, offset) ({	\
+	static_assert(castable_to_type(n, pgoff_t));	\
+	__i915_gem_object_get_sg_dma(obj, n, offset);	\
+})
+
+/**
+ * __i915_gem_object_get_page - helper to find the target page with a page offset
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ *
+ * It uses drm_i915_gem_object's internal shmem scatterlist lookup function as
+ * i915_gem_object_page_iter and calls __i915_gem_object_page_iter_get_sg()
+ * internally.
+ *
+ * Returns:
+ * The target page pointer.
+ *
+ * Recommended to use wrapper macro: i915_gem_object_get_page()
+ * See also __i915_gem_object_page_iter_get_sg()
+ */
+struct page *
+__i915_gem_object_get_page(struct drm_i915_gem_object *obj, pgoff_t n);
+
+/**
+ * i915_gem_object_get_page - wrapper macro for __i915_gem_object_get_page
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ *
+ * Returns:
+ * The target page pointer.
+ *
+ * In order to avoid the truncation of the input parameter, it checks the page
+ * offset n's type from the input parameter before calling
+ * __i915_gem_object_get_page().
+ * See also __i915_gem_object_page_iter_get_sg()
+ */
+#define i915_gem_object_get_page(obj, n) ({		\
+	static_assert(castable_to_type(n, pgoff_t));	\
+	__i915_gem_object_get_page(obj, n);		\
+})
+
+/**
+ * __i915_gem_object_get_dirty_page - helper to find the target page with a page
+ * offset
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ *
+ * It works like i915_gem_object_get_page(), but it marks the returned page dirty.
+ *
+ * Returns:
+ * The target page pointer.
+ *
+ * Recommended to use wrapper macro: i915_gem_object_get_dirty_page()
+ * See also __i915_gem_object_page_iter_get_sg() and __i915_gem_object_get_page()
+ */
+struct page *
+__i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, pgoff_t n);
+
+/**
+ * i915_gem_object_get_dirty_page - wrapper macro for __i915_gem_object_get_dirty_page
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ *
+ * Returns:
+ * The target page pointer.
+ *
+ * In order to avoid the truncation of the input parameter, it checks the page
+ * offset n's type from the input parameter before calling
+ * __i915_gem_object_get_dirty_page().
+ * See also __i915_gem_object_page_iter_get_sg() and __i915_gem_object_get_page()
+ */
+#define i915_gem_object_get_dirty_page(obj, n) ({	\
+	static_assert(castable_to_type(n, pgoff_t));	\
+	__i915_gem_object_get_dirty_page(obj, n);	\
+})
+
+/**
+ * __i915_gem_object_get_dma_address_len - helper to get bus addresses of
+ * targeted DMA mapped scatterlist from i915 GEM buffer object and it's length
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ * @len: DMA mapped scatterlist's DMA bus addresses length to return
+ *
+ * Returns:
+ * Bus addresses of targeted DMA mapped scatterlist
+ *
+ * Recommended to use wrapper macro: i915_gem_object_get_dma_address_len()
+ * See also __i915_gem_object_page_iter_get_sg() and __i915_gem_object_get_sg_dma()
+ */
+dma_addr_t
+__i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj, pgoff_t n,
+				      unsigned int *len);
+
+/**
+ * i915_gem_object_get_dma_address_len - wrapper macro for
+ * __i915_gem_object_get_dma_address_len
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ * @len: DMA mapped scatterlist's DMA bus addresses length to return
+ *
+ * Returns:
+ * Bus addresses of targeted DMA mapped scatterlist
+ *
+ * In order to avoid the truncation of the input parameter, it checks the page
+ * offset n's type from the input parameter before calling
+ * __i915_gem_object_get_dma_address_len().
+ * See also __i915_gem_object_page_iter_get_sg() and
+ * __i915_gem_object_get_dma_address_len()
+ */
+#define i915_gem_object_get_dma_address_len(obj, n, len) ({	\
+	static_assert(castable_to_type(n, pgoff_t));		\
+	__i915_gem_object_get_dma_address_len(obj, n, len);	\
+})
+
+/**
+ * __i915_gem_object_get_dma_address - helper to get bus addresses of
+ * targeted DMA mapped scatterlist from i915 GEM buffer object
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ *
+ * Returns:
+ * Bus addresses of targeted DMA mapped scatterlis
+ *
+ * Recommended to use wrapper macro: i915_gem_object_get_dma_address()
+ * See also __i915_gem_object_page_iter_get_sg() and __i915_gem_object_get_sg_dma()
+ */
+dma_addr_t
+__i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, pgoff_t n);
+
+/**
+ * i915_gem_object_get_dma_address - wrapper macro for
+ * __i915_gem_object_get_dma_address
+ * @obj: i915 GEM buffer object
+ * @n: page offset
+ *
+ * Returns:
+ * Bus addresses of targeted DMA mapped scatterlist
+ *
+ * In order to avoid the truncation of the input parameter, it checks the page
+ * offset n's type from the input parameter before calling
+ * __i915_gem_object_get_dma_address().
+ * See also __i915_gem_object_page_iter_get_sg() and
+ * __i915_gem_object_get_dma_address()
+ */
+#define i915_gem_object_get_dma_address(obj, n) ({	\
+	static_assert(castable_to_type(n, pgoff_t));	\
+	__i915_gem_object_get_dma_address(obj, n);	\
+})
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages);
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+
+static inline int __must_check
+i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	assert_object_held(obj);
+
+	if (atomic_inc_not_zero(&obj->mm.pages_pin_count))
+		return 0;
+
+	return __i915_gem_object_get_pages(obj);
+}
+
+int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj);
+
+static inline bool
+i915_gem_object_has_pages(struct drm_i915_gem_object *obj)
+{
+	return !IS_ERR_OR_NULL(READ_ONCE(obj->mm.pages));
+}
+
+static inline void
+__i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	atomic_inc(&obj->mm.pages_pin_count);
+}
+
+static inline bool
+i915_gem_object_has_pinned_pages(struct drm_i915_gem_object *obj)
+{
+	return atomic_read(&obj->mm.pages_pin_count);
+}
+
+static inline void
+__i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	atomic_dec(&obj->mm.pages_pin_count);
+}
+
+static inline void
+i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_unpin_pages(obj);
+}
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj);
+
+/**
+ * i915_gem_object_pin_map - return a contiguous mapping of the entire object
+ * @obj: the object to map into kernel address space
+ * @type: the type of mapping, used to select pgprot_t
+ *
+ * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
+ * pages and then returns a contiguous mapping of the backing storage into
+ * the kernel address space. Based on the @type of mapping, the PTE will be
+ * set to either WriteBack or WriteCombine (via pgprot_t).
+ *
+ * The caller is responsible for calling i915_gem_object_unpin_map() when the
+ * mapping is no longer required.
+ *
+ * Returns the pointer through which to access the mapped object, or an
+ * ERR_PTR() on error.
+ */
+void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+					   enum i915_map_type type);
+
+void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
+						    enum i915_map_type type);
+
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+				 unsigned long offset,
+				 unsigned long size);
+static inline void i915_gem_object_flush_map(struct drm_i915_gem_object *obj)
+{
+	__i915_gem_object_flush_map(obj, 0, obj->base.size);
+}
+
+/**
+ * i915_gem_object_unpin_map - releases an earlier mapping
+ * @obj: the object to unmap
+ *
+ * After pinning the object and mapping its pages, once you are finished
+ * with your access, call i915_gem_object_unpin_map() to release the pin
+ * upon the mapping. Once the pin count reaches zero, that mapping may be
+ * removed.
+ */
+static inline void i915_gem_object_unpin_map(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+void __i915_gem_object_release_map(struct drm_i915_gem_object *obj);
+
+int i915_gem_object_prepare_read(struct drm_i915_gem_object *obj,
+				 unsigned int *needs_clflush);
+int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
+				  unsigned int *needs_clflush);
+#define CLFLUSH_BEFORE	BIT(0)
+#define CLFLUSH_AFTER	BIT(1)
+#define CLFLUSH_FLAGS	(CLFLUSH_BEFORE | CLFLUSH_AFTER)
+
+static inline void
+i915_gem_object_finish_access(struct drm_i915_gem_object *obj)
+{
+	i915_gem_object_unpin_pages(obj);
+}
+
+int i915_gem_object_get_moving_fence(struct drm_i915_gem_object *obj,
+				     struct dma_fence **fence);
+int i915_gem_object_wait_moving_fence(struct drm_i915_gem_object *obj,
+				      bool intr);
+bool i915_gem_object_has_unknown_state(struct drm_i915_gem_object *obj);
+
+void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
+					 unsigned int cache_level);
+void i915_gem_object_set_pat_index(struct drm_i915_gem_object *obj,
+				   unsigned int pat_index);
+bool i915_gem_object_can_bypass_llc(struct drm_i915_gem_object *obj);
+void i915_gem_object_flush_if_display(struct drm_i915_gem_object *obj);
+void i915_gem_object_flush_if_display_locked(struct drm_i915_gem_object *obj);
+bool i915_gem_cpu_write_needs_clflush(struct drm_i915_gem_object *obj);
+
+int __must_check
+i915_gem_object_set_to_wc_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write);
+int __must_check
+i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write);
+struct i915_vma * __must_check
+i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
+				     struct i915_gem_ww_ctx *ww,
+				     u32 alignment,
+				     const struct i915_gtt_view *view,
+				     unsigned int flags);
+
+void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj);
+void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj);
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
+void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj);
+
+static inline void __start_cpu_write(struct drm_i915_gem_object *obj)
+{
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	if (i915_gem_cpu_write_needs_clflush(obj))
+		obj->cache_dirty = true;
+}
+
+void i915_gem_fence_wait_priority(struct dma_fence *fence,
+				  const struct i915_sched_attr *attr);
+
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+			 unsigned int flags,
+			 long timeout);
+int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+				  unsigned int flags,
+				  const struct i915_sched_attr *attr);
+
+void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+					 enum fb_op_origin origin);
+void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+					      enum fb_op_origin origin);
+
+static inline void
+i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+				  enum fb_op_origin origin)
+{
+	if (unlikely(rcu_access_pointer(obj->frontbuffer)))
+		__i915_gem_object_flush_frontbuffer(obj, origin);
+}
+
+static inline void
+i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+				       enum fb_op_origin origin)
+{
+	if (unlikely(rcu_access_pointer(obj->frontbuffer)))
+		__i915_gem_object_invalidate_frontbuffer(obj, origin);
+}
+
+int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, void *dst, int size);
+
+bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj);
+
+void __i915_gem_free_object_rcu(struct rcu_head *head);
+
+void __i915_gem_object_pages_fini(struct drm_i915_gem_object *obj);
+
+void __i915_gem_free_object(struct drm_i915_gem_object *obj);
+
+bool i915_gem_object_evictable(struct drm_i915_gem_object *obj);
+
+bool i915_gem_object_migratable(struct drm_i915_gem_object *obj);
+
+int i915_gem_object_migrate(struct drm_i915_gem_object *obj,
+			    struct i915_gem_ww_ctx *ww,
+			    enum intel_region_id id);
+int __i915_gem_object_migrate(struct drm_i915_gem_object *obj,
+			      struct i915_gem_ww_ctx *ww,
+			      enum intel_region_id id,
+			      unsigned int flags);
+
+bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj,
+				 enum intel_region_id id);
+
+int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
+				   unsigned int flags);
+
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
+					enum intel_memory_type type);
+
+bool i915_gem_object_needs_ccs_pages(struct drm_i915_gem_object *obj);
+
+int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+			 size_t size, struct intel_memory_region *mr,
+			 struct address_space *mapping,
+			 unsigned int max_segment);
+void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
+			 bool dirty, bool backup);
+void __shmem_writeback(size_t size, struct address_space *mapping);
+
+#ifdef CONFIG_MMU_NOTIFIER
+static inline bool
+i915_gem_object_is_userptr(struct drm_i915_gem_object *obj)
+{
+	return obj->userptr.notifier.mm;
+}
+
+int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj);
+int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj);
+int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj);
+#else
+static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) { return false; }
+
+static inline int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
+static inline int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
+static inline int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj) { GEM_BUG_ON(1); return -ENODEV; }
+
+#endif
+
+/**
+ * i915_gem_object_get_frontbuffer - Get the object's frontbuffer
+ * @obj: The object whose frontbuffer to get.
+ *
+ * Get pointer to object's frontbuffer if such exists. Please note that RCU
+ * mechanism is used to handle e.g. ongoing removal of frontbuffer pointer.
+ *
+ * Return: pointer to object's frontbuffer is such exists or NULL
+ */
+static inline struct intel_frontbuffer *
+i915_gem_object_get_frontbuffer(const struct drm_i915_gem_object *obj)
+{
+	struct intel_frontbuffer *front;
+
+	if (likely(!rcu_access_pointer(obj->frontbuffer)))
+		return NULL;
+
+	rcu_read_lock();
+	do {
+		front = rcu_dereference(obj->frontbuffer);
+		if (!front)
+			break;
+
+		if (unlikely(!kref_get_unless_zero(&front->ref)))
+			continue;
+
+		if (likely(front == rcu_access_pointer(obj->frontbuffer)))
+			break;
+
+		intel_frontbuffer_put(front);
+	} while (1);
+	rcu_read_unlock();
+
+	return front;
+}
+
+/**
+ * i915_gem_object_set_frontbuffer - Set the object's frontbuffer
+ * @obj: The object whose frontbuffer to set.
+ * @front: The frontbuffer to set
+ *
+ * Set object's frontbuffer pointer. If frontbuffer is already set for the
+ * object keep it and return it's pointer to the caller. Please note that RCU
+ * mechanism is used to handle e.g. ongoing removal of frontbuffer pointer. This
+ * function is protected by i915->display.fb_tracking.lock
+ *
+ * Return: pointer to frontbuffer which was set.
+ */
+static inline struct intel_frontbuffer *
+i915_gem_object_set_frontbuffer(struct drm_i915_gem_object *obj,
+				struct intel_frontbuffer *front)
+{
+	struct intel_frontbuffer *cur = front;
+
+	if (!front) {
+		RCU_INIT_POINTER(obj->frontbuffer, NULL);
+	} else if (rcu_access_pointer(obj->frontbuffer)) {
+		cur = rcu_dereference_protected(obj->frontbuffer, true);
+		kref_get(&cur->ref);
+	} else {
+		drm_gem_object_get(intel_bo_to_drm_bo(obj));
+		rcu_assign_pointer(obj->frontbuffer, front);
+	}
+
+	return cur;
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
new file mode 100644
index 0000000000..2292404007
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -0,0 +1,735 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __I915_GEM_OBJECT_TYPES_H__
+#define __I915_GEM_OBJECT_TYPES_H__
+
+#include <linux/mmu_notifier.h>
+
+#include <drm/drm_gem.h>
+#include <drm/ttm/ttm_bo.h>
+#include <uapi/drm/i915_drm.h>
+
+#include "i915_active.h"
+#include "i915_selftest.h"
+#include "i915_vma_resource.h"
+
+#include "gt/intel_gt_defines.h"
+
+struct drm_i915_gem_object;
+struct intel_fronbuffer;
+struct intel_memory_region;
+
+/*
+ * struct i915_lut_handle tracks the fast lookups from handle to vma used
+ * for execbuf. Although we use a radixtree for that mapping, in order to
+ * remove them as the object or context is closed, we need a secondary list
+ * and a translation entry (i915_lut_handle).
+ */
+struct i915_lut_handle {
+	struct list_head obj_link;
+	struct i915_gem_context *ctx;
+	u32 handle;
+};
+
+struct drm_i915_gem_object_ops {
+	unsigned int flags;
+#define I915_GEM_OBJECT_IS_SHRINKABLE			BIT(1)
+/* Skip the shrinker management in set_pages/unset_pages */
+#define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST	BIT(2)
+#define I915_GEM_OBJECT_IS_PROXY			BIT(3)
+#define I915_GEM_OBJECT_NO_MMAP				BIT(4)
+
+	/* Interface between the GEM object and its backing storage.
+	 * get_pages() is called once prior to the use of the associated set
+	 * of pages before to binding them into the GTT, and put_pages() is
+	 * called after we no longer need them. As we expect there to be
+	 * associated cost with migrating pages between the backing storage
+	 * and making them available for the GPU (e.g. clflush), we may hold
+	 * onto the pages after they are no longer referenced by the GPU
+	 * in case they may be used again shortly (for example migrating the
+	 * pages to a different memory domain within the GTT). put_pages()
+	 * will therefore most likely be called when the object itself is
+	 * being released or under memory pressure (where we attempt to
+	 * reap pages for the shrinker).
+	 */
+	int (*get_pages)(struct drm_i915_gem_object *obj);
+	void (*put_pages)(struct drm_i915_gem_object *obj,
+			  struct sg_table *pages);
+	int (*truncate)(struct drm_i915_gem_object *obj);
+	/**
+	 * shrink - Perform further backend specific actions to facilate
+	 * shrinking.
+	 * @obj: The gem object
+	 * @flags: Extra flags to control shrinking behaviour in the backend
+	 *
+	 * Possible values for @flags:
+	 *
+	 * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the
+	 * backing pages, if supported.
+	 *
+	 * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to
+	 * idle.  Active objects can be considered later. The TTM backend for
+	 * example might have aync migrations going on, which don't use any
+	 * i915_vma to track the active GTT binding, and hence having an unbound
+	 * object might not be enough.
+	 */
+#define I915_GEM_OBJECT_SHRINK_WRITEBACK   BIT(0)
+#define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1)
+	int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags);
+
+	int (*pread)(struct drm_i915_gem_object *obj,
+		     const struct drm_i915_gem_pread *arg);
+	int (*pwrite)(struct drm_i915_gem_object *obj,
+		      const struct drm_i915_gem_pwrite *arg);
+	u64 (*mmap_offset)(struct drm_i915_gem_object *obj);
+	void (*unmap_virtual)(struct drm_i915_gem_object *obj);
+
+	int (*dmabuf_export)(struct drm_i915_gem_object *obj);
+
+	/**
+	 * adjust_lru - notify that the madvise value was updated
+	 * @obj: The gem object
+	 *
+	 * The madvise value may have been updated, or object was recently
+	 * referenced so act accordingly (Perhaps changing an LRU list etc).
+	 */
+	void (*adjust_lru)(struct drm_i915_gem_object *obj);
+
+	/**
+	 * delayed_free - Override the default delayed free implementation
+	 */
+	void (*delayed_free)(struct drm_i915_gem_object *obj);
+
+	/**
+	 * migrate - Migrate object to a different region either for
+	 * pinning or for as long as the object lock is held.
+	 */
+	int (*migrate)(struct drm_i915_gem_object *obj,
+		       struct intel_memory_region *mr,
+		       unsigned int flags);
+
+	void (*release)(struct drm_i915_gem_object *obj);
+
+	const struct vm_operations_struct *mmap_ops;
+	const char *name; /* friendly name for debug, e.g. lockdep classes */
+};
+
+/**
+ * enum i915_cache_level - The supported GTT caching values for system memory
+ * pages.
+ *
+ * These translate to some special GTT PTE bits when binding pages into some
+ * address space. It also determines whether an object, or rather its pages are
+ * coherent with the GPU, when also reading or writing through the CPU cache
+ * with those pages.
+ *
+ * Userspace can also control this through struct drm_i915_gem_caching.
+ */
+enum i915_cache_level {
+	/**
+	 * @I915_CACHE_NONE:
+	 *
+	 * GPU access is not coherent with the CPU cache. If the cache is dirty
+	 * and we need the underlying pages to be coherent with some later GPU
+	 * access then we need to manually flush the pages.
+	 *
+	 * On shared LLC platforms reads and writes through the CPU cache are
+	 * still coherent even with this setting. See also
+	 * &drm_i915_gem_object.cache_coherent for more details. Due to this we
+	 * should only ever use uncached for scanout surfaces, otherwise we end
+	 * up over-flushing in some places.
+	 *
+	 * This is the default on non-LLC platforms.
+	 */
+	I915_CACHE_NONE = 0,
+	/**
+	 * @I915_CACHE_LLC:
+	 *
+	 * GPU access is coherent with the CPU cache. If the cache is dirty,
+	 * then the GPU will ensure that access remains coherent, when both
+	 * reading and writing through the CPU cache. GPU writes can dirty the
+	 * CPU cache.
+	 *
+	 * Not used for scanout surfaces.
+	 *
+	 * Applies to both platforms with shared LLC(HAS_LLC), and snooping
+	 * based platforms(HAS_SNOOP).
+	 *
+	 * This is the default on shared LLC platforms.  The only exception is
+	 * scanout objects, where the display engine is not coherent with the
+	 * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is
+	 * automatically applied by the kernel in pin_for_display, if userspace
+	 * has not done so already.
+	 */
+	I915_CACHE_LLC,
+	/**
+	 * @I915_CACHE_L3_LLC:
+	 *
+	 * Explicitly enable the Gfx L3 cache, with coherent LLC.
+	 *
+	 * The Gfx L3 sits between the domain specific caches, e.g
+	 * sampler/render caches, and the larger LLC. LLC is coherent with the
+	 * GPU, but L3 is only visible to the GPU, so likely needs to be flushed
+	 * when the workload completes.
+	 *
+	 * Not used for scanout surfaces.
+	 *
+	 * Only exposed on some gen7 + GGTT. More recent hardware has dropped
+	 * this explicit setting, where it should now be enabled by default.
+	 */
+	I915_CACHE_L3_LLC,
+	/**
+	 * @I915_CACHE_WT:
+	 *
+	 * Write-through. Used for scanout surfaces.
+	 *
+	 * The GPU can utilise the caches, while still having the display engine
+	 * be coherent with GPU writes, as a result we don't need to flush the
+	 * CPU caches when moving out of the render domain. This is the default
+	 * setting chosen by the kernel, if supported by the HW, otherwise we
+	 * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU
+	 * cache still need to be flushed, to remain coherent with the display
+	 * engine.
+	 */
+	I915_CACHE_WT,
+	/**
+	 * @I915_MAX_CACHE_LEVEL:
+	 *
+	 * Mark the last entry in the enum. Used for defining cachelevel_to_pat
+	 * array for cache_level to pat translation table.
+	 */
+	I915_MAX_CACHE_LEVEL,
+};
+
+enum i915_map_type {
+	I915_MAP_WB = 0,
+	I915_MAP_WC,
+#define I915_MAP_OVERRIDE BIT(31)
+	I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE,
+	I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE,
+};
+
+enum i915_mmap_type {
+	I915_MMAP_TYPE_GTT = 0,
+	I915_MMAP_TYPE_WC,
+	I915_MMAP_TYPE_WB,
+	I915_MMAP_TYPE_UC,
+	I915_MMAP_TYPE_FIXED,
+};
+
+struct i915_mmap_offset {
+	struct drm_vma_offset_node vma_node;
+	struct drm_i915_gem_object *obj;
+	enum i915_mmap_type mmap_type;
+
+	struct rb_node offset;
+};
+
+struct i915_gem_object_page_iter {
+	struct scatterlist *sg_pos;
+	unsigned int sg_idx; /* in pages, but 32bit eek! */
+
+	struct radix_tree_root radix;
+	struct mutex lock; /* protects this cache */
+};
+
+struct drm_i915_gem_object {
+	/*
+	 * We might have reason to revisit the below since it wastes
+	 * a lot of space for non-ttm gem objects.
+	 * In any case, always use the accessors for the ttm_buffer_object
+	 * when accessing it.
+	 */
+	union {
+		struct drm_gem_object base;
+		struct ttm_buffer_object __do_not_access;
+	};
+
+	const struct drm_i915_gem_object_ops *ops;
+
+	struct {
+		/**
+		 * @vma.lock: protect the list/tree of vmas
+		 */
+		spinlock_t lock;
+
+		/**
+		 * @vma.list: List of VMAs backed by this object
+		 *
+		 * The VMA on this list are ordered by type, all GGTT vma are
+		 * placed at the head and all ppGTT vma are placed at the tail.
+		 * The different types of GGTT vma are unordered between
+		 * themselves, use the @vma.tree (which has a defined order
+		 * between all VMA) to quickly find an exact match.
+		 */
+		struct list_head list;
+
+		/**
+		 * @vma.tree: Ordered tree of VMAs backed by this object
+		 *
+		 * All VMA created for this object are placed in the @vma.tree
+		 * for fast retrieval via a binary search in
+		 * i915_vma_instance(). They are also added to @vma.list for
+		 * easy iteration.
+		 */
+		struct rb_root tree;
+	} vma;
+
+	/**
+	 * @lut_list: List of vma lookup entries in use for this object.
+	 *
+	 * If this object is closed, we need to remove all of its VMA from
+	 * the fast lookup index in associated contexts; @lut_list provides
+	 * this translation from object to context->handles_vma.
+	 */
+	struct list_head lut_list;
+	spinlock_t lut_lock; /* guards lut_list */
+
+	/**
+	 * @obj_link: Link into @i915_gem_ww_ctx.obj_list
+	 *
+	 * When we lock this object through i915_gem_object_lock() with a
+	 * context, we add it to the list to ensure we can unlock everything
+	 * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called.
+	 */
+	struct list_head obj_link;
+	/**
+	 * @shared_resv_from: The object shares the resv from this vm.
+	 */
+	struct i915_address_space *shares_resv_from;
+
+	union {
+		struct rcu_head rcu;
+		struct llist_node freed;
+	};
+
+	/**
+	 * Whether the object is currently in the GGTT or any other supported
+	 * fake offset mmap backed by lmem.
+	 */
+	unsigned int userfault_count;
+	struct list_head userfault_link;
+
+	struct {
+		spinlock_t lock; /* Protects access to mmo offsets */
+		struct rb_root offsets;
+	} mmo;
+
+	I915_SELFTEST_DECLARE(struct list_head st_link);
+
+	unsigned long flags;
+#define I915_BO_ALLOC_CONTIGUOUS  BIT(0)
+#define I915_BO_ALLOC_VOLATILE    BIT(1)
+#define I915_BO_ALLOC_CPU_CLEAR   BIT(2)
+#define I915_BO_ALLOC_USER        BIT(3)
+/* Object is allowed to lose its contents on suspend / resume, even if pinned */
+#define I915_BO_ALLOC_PM_VOLATILE BIT(4)
+/* Object needs to be restored early using memcpy during resume */
+#define I915_BO_ALLOC_PM_EARLY    BIT(5)
+/*
+ * Object is likely never accessed by the CPU. This will prioritise the BO to be
+ * allocated in the non-mappable portion of lmem. This is merely a hint, and if
+ * dealing with userspace objects the CPU fault handler is free to ignore this.
+ */
+#define I915_BO_ALLOC_GPU_ONLY	  BIT(6)
+#define I915_BO_ALLOC_CCS_AUX	  BIT(7)
+/*
+ * Object is allowed to retain its initial data and will not be cleared on first
+ * access if used along with I915_BO_ALLOC_USER. This is mainly to keep
+ * preallocated framebuffer data intact while transitioning it to i915drmfb.
+ */
+#define I915_BO_PREALLOC	  BIT(8)
+#define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \
+			     I915_BO_ALLOC_VOLATILE | \
+			     I915_BO_ALLOC_CPU_CLEAR | \
+			     I915_BO_ALLOC_USER | \
+			     I915_BO_ALLOC_PM_VOLATILE | \
+			     I915_BO_ALLOC_PM_EARLY | \
+			     I915_BO_ALLOC_GPU_ONLY | \
+			     I915_BO_ALLOC_CCS_AUX | \
+			     I915_BO_PREALLOC)
+#define I915_BO_READONLY          BIT(9)
+#define I915_TILING_QUIRK_BIT     10 /* unknown swizzling; do not release! */
+#define I915_BO_PROTECTED         BIT(11)
+	/**
+	 * @mem_flags - Mutable placement-related flags
+	 *
+	 * These are flags that indicate specifics of the memory region
+	 * the object is currently in. As such they are only stable
+	 * either under the object lock or if the object is pinned.
+	 */
+	unsigned int mem_flags;
+#define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */
+#define I915_BO_FLAG_IOMEM       BIT(1) /* Object backed by IO memory */
+	/**
+	 * @pat_index: The desired PAT index.
+	 *
+	 * See hardware specification for valid PAT indices for each platform.
+	 * This field replaces the @cache_level that contains a value of enum
+	 * i915_cache_level since PAT indices are being used by both userspace
+	 * and kernel mode driver for caching policy control after GEN12.
+	 * In the meantime platform specific tables are created to translate
+	 * i915_cache_level into pat index, for more details check the macros
+	 * defined i915/i915_pci.c, e.g. PVC_CACHELEVEL.
+	 * For backward compatibility, this field contains values exactly match
+	 * the entries of enum i915_cache_level for pre-GEN12 platforms (See
+	 * LEGACY_CACHELEVEL), so that the PTE encode functions for these
+	 * legacy platforms can stay the same.
+	 */
+	unsigned int pat_index:6;
+	/**
+	 * @pat_set_by_user: Indicate whether pat_index is set by user space
+	 *
+	 * This field is set to false by default, only set to true if the
+	 * pat_index is set by user space. By design, user space is capable of
+	 * managing caching behavior by setting pat_index, in which case this
+	 * kernel mode driver should never touch the pat_index.
+	 */
+	unsigned int pat_set_by_user:1;
+	/**
+	 * @cache_coherent:
+	 *
+	 * Note: with the change above which replaced @cache_level with pat_index,
+	 * the use of @cache_coherent is limited to the objects created by kernel
+	 * or by userspace without pat index specified.
+	 * Check for @pat_set_by_user to find out if an object has pat index set
+	 * by userspace. The ioctl's to change cache settings have also been
+	 * disabled for the objects with pat index set by userspace. Please don't
+	 * assume @cache_coherent having the flags set as describe here. A helper
+	 * function i915_gem_object_has_cache_level() provides one way to bypass
+	 * the use of this field.
+	 *
+	 * Track whether the pages are coherent with the GPU if reading or
+	 * writing through the CPU caches. The largely depends on the
+	 * @cache_level setting.
+	 *
+	 * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom
+	 * platforms, coherency must be explicitly requested with some special
+	 * GTT caching bits(see enum i915_cache_level). When enabling coherency
+	 * it does come at a performance and power cost on such platforms. On
+	 * the flip side the kernel does not need to manually flush any buffers
+	 * which need to be coherent with the GPU, if the object is not coherent
+	 * i.e @cache_coherent is zero.
+	 *
+	 * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory
+	 * access will automatically snoop the CPU caches(even with CACHE_NONE).
+	 * The one exception is when dealing with the display engine, like with
+	 * scanout surfaces. To handle this the kernel will always flush the
+	 * surface out of the CPU caches when preparing it for scanout.  Also
+	 * note that since scanout surfaces are only ever read by the display
+	 * engine we only need to care about flushing any writes through the CPU
+	 * cache, reads on the other hand will always be coherent.
+	 *
+	 * Something strange here is why @cache_coherent is not a simple
+	 * boolean, i.e coherent vs non-coherent. The reasoning for this is back
+	 * to the display engine not being fully coherent. As a result scanout
+	 * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT.
+	 * In the case of seeing I915_CACHE_NONE the kernel makes the assumption
+	 * that this is likely a scanout surface, and will set @cache_coherent
+	 * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared
+	 * LLC. The kernel uses this to always flush writes through the CPU
+	 * cache as early as possible, where it can, in effect keeping
+	 * @cache_dirty clean, so we can potentially avoid stalling when
+	 * flushing the surface just before doing the scanout.  This does mean
+	 * we might unnecessarily flush non-scanout objects in some places, but
+	 * the default assumption is that all normal objects should be using
+	 * I915_CACHE_LLC, at least on platforms with the shared LLC.
+	 *
+	 * Supported values:
+	 *
+	 * I915_BO_CACHE_COHERENT_FOR_READ:
+	 *
+	 * On shared LLC platforms, we use this for special scanout surfaces,
+	 * where the display engine is not coherent with the CPU cache. As such
+	 * we need to ensure we flush any writes before doing the scanout. As an
+	 * optimisation we try to flush any writes as early as possible to avoid
+	 * stalling later.
+	 *
+	 * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC
+	 * platforms, we use:
+	 *
+	 *	cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ
+	 *
+	 * While for normal objects that are fully coherent, including special
+	 * scanout surfaces marked as I915_CACHE_WT, we use:
+	 *
+	 *	cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ |
+	 *			 I915_BO_CACHE_COHERENT_FOR_WRITE
+	 *
+	 * And then for objects that are not coherent at all we use:
+	 *
+	 *	cache_coherent = 0
+	 *
+	 * I915_BO_CACHE_COHERENT_FOR_WRITE:
+	 *
+	 * When writing through the CPU cache, the GPU is still coherent. Note
+	 * that this also implies I915_BO_CACHE_COHERENT_FOR_READ.
+	 */
+#define I915_BO_CACHE_COHERENT_FOR_READ BIT(0)
+#define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1)
+	unsigned int cache_coherent:2;
+
+	/**
+	 * @cache_dirty:
+	 *
+	 * Note: with the change above which replaced cache_level with pat_index,
+	 * the use of @cache_dirty is limited to the objects created by kernel
+	 * or by userspace without pat index specified.
+	 * Check for @pat_set_by_user to find out if an object has pat index set
+	 * by userspace. The ioctl's to change cache settings have also been
+	 * disabled for the objects with pat_index set by userspace. Please don't
+	 * assume @cache_dirty is set as describe here. Also see helper function
+	 * i915_gem_object_has_cache_level() for possible ways to bypass the use
+	 * of this field.
+	 *
+	 * Track if we are we dirty with writes through the CPU cache for this
+	 * object. As a result reading directly from main memory might yield
+	 * stale data.
+	 *
+	 * This also ties into whether the kernel is tracking the object as
+	 * coherent with the GPU, as per @cache_coherent, as it determines if
+	 * flushing might be needed at various points.
+	 *
+	 * Another part of @cache_dirty is managing flushing when first
+	 * acquiring the pages for system memory, at this point the pages are
+	 * considered foreign, so the default assumption is that the cache is
+	 * dirty, for example the page zeroing done by the kernel might leave
+	 * writes though the CPU cache, or swapping-in, while the actual data in
+	 * main memory is potentially stale.  Note that this is a potential
+	 * security issue when dealing with userspace objects and zeroing. Now,
+	 * whether we actually need apply the big sledgehammer of flushing all
+	 * the pages on acquire depends on if @cache_coherent is marked as
+	 * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent
+	 * for both reads and writes though the CPU cache.
+	 *
+	 * Note that on shared LLC platforms we still apply the heavy flush for
+	 * I915_CACHE_NONE objects, under the assumption that this is going to
+	 * be used for scanout.
+	 *
+	 * Update: On some hardware there is now also the 'Bypass LLC' MOCS
+	 * entry, which defeats our @cache_coherent tracking, since userspace
+	 * can freely bypass the CPU cache when touching the pages with the GPU,
+	 * where the kernel is completely unaware. On such platform we need
+	 * apply the sledgehammer-on-acquire regardless of the @cache_coherent.
+	 *
+	 * Special care is taken on non-LLC platforms, to prevent potential
+	 * information leak. The driver currently ensures:
+	 *
+	 *   1. All userspace objects, by default, have @cache_level set as
+	 *   I915_CACHE_NONE. The only exception is userptr objects, where we
+	 *   instead force I915_CACHE_LLC, but we also don't allow userspace to
+	 *   ever change the @cache_level for such objects. Another special case
+	 *   is dma-buf, which doesn't rely on @cache_dirty,  but there we
+	 *   always do a forced flush when acquiring the pages, if there is a
+	 *   chance that the pages can be read directly from main memory with
+	 *   the GPU.
+	 *
+	 *   2. All I915_CACHE_NONE objects have @cache_dirty initially true.
+	 *
+	 *   3. All swapped-out objects(i.e shmem) have @cache_dirty set to
+	 *   true.
+	 *
+	 *   4. The @cache_dirty is never freely reset before the initial
+	 *   flush, even if userspace adjusts the @cache_level through the
+	 *   i915_gem_set_caching_ioctl.
+	 *
+	 *   5. All @cache_dirty objects(including swapped-in) are initially
+	 *   flushed with a synchronous call to drm_clflush_sg in
+	 *   __i915_gem_object_set_pages. The @cache_dirty can be freely reset
+	 *   at this point. All further asynchronous clfushes are never security
+	 *   critical, i.e userspace is free to race against itself.
+	 */
+	unsigned int cache_dirty:1;
+
+	/* @is_dpt: Object houses a display page table (DPT) */
+	unsigned int is_dpt:1;
+
+	/**
+	 * @read_domains: Read memory domains.
+	 *
+	 * These monitor which caches contain read/write data related to the
+	 * object. When transitioning from one set of domains to another,
+	 * the driver is called to ensure that caches are suitably flushed and
+	 * invalidated.
+	 */
+	u16 read_domains;
+
+	/**
+	 * @write_domain: Corresponding unique write memory domain.
+	 */
+	u16 write_domain;
+
+	struct intel_frontbuffer __rcu *frontbuffer;
+
+	/** Current tiling stride for the object, if it's tiled. */
+	unsigned int tiling_and_stride;
+#define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */
+#define TILING_MASK (FENCE_MINIMUM_STRIDE - 1)
+#define STRIDE_MASK (~TILING_MASK)
+
+	struct {
+		/*
+		 * Protects the pages and their use. Do not use directly, but
+		 * instead go through the pin/unpin interfaces.
+		 */
+		atomic_t pages_pin_count;
+
+		/**
+		 * @shrink_pin: Prevents the pages from being made visible to
+		 * the shrinker, while the shrink_pin is non-zero. Most users
+		 * should pretty much never have to care about this, outside of
+		 * some special use cases.
+		 *
+		 * By default most objects will start out as visible to the
+		 * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the
+		 * backing pages are attached to the object, like in
+		 * __i915_gem_object_set_pages(). They will then be removed the
+		 * shrinker list once the pages are released.
+		 *
+		 * The @shrink_pin is incremented by calling
+		 * i915_gem_object_make_unshrinkable(), which will also remove
+		 * the object from the shrinker list, if the pin count was zero.
+		 *
+		 * Callers will then typically call
+		 * i915_gem_object_make_shrinkable() or
+		 * i915_gem_object_make_purgeable() to decrement the pin count,
+		 * and make the pages visible again.
+		 */
+		atomic_t shrink_pin;
+
+		/**
+		 * @ttm_shrinkable: True when the object is using shmem pages
+		 * underneath. Protected by the object lock.
+		 */
+		bool ttm_shrinkable;
+
+		/**
+		 * @unknown_state: Indicate that the object is effectively
+		 * borked. This is write-once and set if we somehow encounter a
+		 * fatal error when moving/clearing the pages, and we are not
+		 * able to fallback to memcpy/memset, like on small-BAR systems.
+		 * The GPU should also be wedged (or in the process) at this
+		 * point.
+		 *
+		 * Only valid to read this after acquiring the dma-resv lock and
+		 * waiting for all DMA_RESV_USAGE_KERNEL fences to be signalled,
+		 * or if we otherwise know that the moving fence has signalled,
+		 * and we are certain the pages underneath are valid for
+		 * immediate access (under normal operation), like just prior to
+		 * binding the object or when setting up the CPU fault handler.
+		 * See i915_gem_object_has_unknown_state();
+		 */
+		bool unknown_state;
+
+		/**
+		 * Priority list of potential placements for this object.
+		 */
+		struct intel_memory_region **placements;
+		int n_placements;
+
+		/**
+		 * Memory region for this object.
+		 */
+		struct intel_memory_region *region;
+
+		/**
+		 * Memory manager resource allocated for this object. Only
+		 * needed for the mock region.
+		 */
+		struct ttm_resource *res;
+
+		/**
+		 * Element within memory_region->objects or region->purgeable
+		 * if the object is marked as DONTNEED. Access is protected by
+		 * region->obj_lock.
+		 */
+		struct list_head region_link;
+
+		struct i915_refct_sgt *rsgt;
+		struct sg_table *pages;
+		void *mapping;
+
+		struct i915_page_sizes page_sizes;
+
+		I915_SELFTEST_DECLARE(unsigned int page_mask);
+
+		struct i915_gem_object_page_iter get_page;
+		struct i915_gem_object_page_iter get_dma_page;
+
+		/**
+		 * Element within i915->mm.shrink_list or i915->mm.purge_list,
+		 * locked by i915->mm.obj_lock.
+		 */
+		struct list_head link;
+
+		/**
+		 * Advice: are the backing pages purgeable?
+		 */
+		unsigned int madv:2;
+
+		/**
+		 * This is set if the object has been written to since the
+		 * pages were last acquired.
+		 */
+		bool dirty:1;
+
+		u32 tlb[I915_MAX_GT];
+	} mm;
+
+	struct {
+		struct i915_refct_sgt *cached_io_rsgt;
+		struct i915_gem_object_page_iter get_io_page;
+		struct drm_i915_gem_object *backup;
+		bool created:1;
+	} ttm;
+
+	/*
+	 * Record which PXP key instance this object was created against (if
+	 * any), so we can use it to determine if the encryption is valid by
+	 * comparing against the current key instance.
+	 */
+	u32 pxp_key_instance;
+
+	/** Record of address bit 17 of each page at last unbind. */
+	unsigned long *bit_17;
+
+	union {
+#ifdef CONFIG_MMU_NOTIFIER
+		struct i915_gem_userptr {
+			uintptr_t ptr;
+			unsigned long notifier_seq;
+
+			struct mmu_interval_notifier notifier;
+			struct page **pvec;
+			int page_ref;
+		} userptr;
+#endif
+
+		struct drm_mm_node *stolen;
+
+		resource_size_t bo_offset;
+
+		unsigned long scratch;
+		u64 encode;
+
+		void *gvt_info;
+	};
+};
+
+#define intel_bo_to_drm_bo(bo) (&(bo)->base)
+#define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev)
+
+static inline struct drm_i915_gem_object *
+to_intel_bo(struct drm_gem_object *gem)
+{
+	/* Assert that to_intel_bo(NULL) == NULL */
+	BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base));
+
+	return container_of(gem, struct drm_i915_gem_object, base);
+}
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
new file mode 100644
index 0000000000..0ba955611d
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c
@@ -0,0 +1,675 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <drm/drm_cache.h>
+
+#include "gt/intel_gt.h"
+#include "gt/intel_tlb.h"
+
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_scatterlist.h"
+#include "i915_gem_lmem.h"
+#include "i915_gem_mman.h"
+
+void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned long supported = RUNTIME_INFO(i915)->page_sizes;
+	bool shrinkable;
+	int i;
+
+	assert_object_held_shared(obj);
+
+	if (i915_gem_object_is_volatile(obj))
+		obj->mm.madv = I915_MADV_DONTNEED;
+
+	/* Make the pages coherent with the GPU (flushing any swapin). */
+	if (obj->cache_dirty) {
+		WARN_ON_ONCE(IS_DGFX(i915));
+		obj->write_domain = 0;
+		if (i915_gem_object_has_struct_page(obj))
+			drm_clflush_sg(pages);
+		obj->cache_dirty = false;
+	}
+
+	obj->mm.get_page.sg_pos = pages->sgl;
+	obj->mm.get_page.sg_idx = 0;
+	obj->mm.get_dma_page.sg_pos = pages->sgl;
+	obj->mm.get_dma_page.sg_idx = 0;
+
+	obj->mm.pages = pages;
+
+	obj->mm.page_sizes.phys = i915_sg_dma_sizes(pages->sgl);
+	GEM_BUG_ON(!obj->mm.page_sizes.phys);
+
+	/*
+	 * Calculate the supported page-sizes which fit into the given
+	 * sg_page_sizes. This will give us the page-sizes which we may be able
+	 * to use opportunistically when later inserting into the GTT. For
+	 * example if phys=2G, then in theory we should be able to use 1G, 2M,
+	 * 64K or 4K pages, although in practice this will depend on a number of
+	 * other factors.
+	 */
+	obj->mm.page_sizes.sg = 0;
+	for_each_set_bit(i, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		if (obj->mm.page_sizes.phys & ~0u << i)
+			obj->mm.page_sizes.sg |= BIT(i);
+	}
+	GEM_BUG_ON(!HAS_PAGE_SIZES(i915, obj->mm.page_sizes.sg));
+
+	shrinkable = i915_gem_object_is_shrinkable(obj);
+
+	if (i915_gem_object_is_tiled(obj) &&
+	    i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES) {
+		GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj));
+		i915_gem_object_set_tiling_quirk(obj);
+		GEM_BUG_ON(!list_empty(&obj->mm.link));
+		atomic_inc(&obj->mm.shrink_pin);
+		shrinkable = false;
+	}
+
+	if (shrinkable && !i915_gem_object_has_self_managed_shrink_list(obj)) {
+		struct list_head *list;
+		unsigned long flags;
+
+		assert_object_held(obj);
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+
+		i915->mm.shrink_count++;
+		i915->mm.shrink_memory += obj->base.size;
+
+		if (obj->mm.madv != I915_MADV_WILLNEED)
+			list = &i915->mm.purge_list;
+		else
+			list = &i915->mm.shrink_list;
+		list_add_tail(&obj->mm.link, list);
+
+		atomic_set(&obj->mm.shrink_pin, 0);
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	}
+}
+
+int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int err;
+
+	assert_object_held_shared(obj);
+
+	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+		drm_dbg(&i915->drm,
+			"Attempting to obtain a purgeable object\n");
+		return -EFAULT;
+	}
+
+	err = obj->ops->get_pages(obj);
+	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
+
+	return err;
+}
+
+/* Ensure that the associated pages are gathered from the backing storage
+ * and pinned into our object. i915_gem_object_pin_pages() may be called
+ * multiple times before they are released by a single call to
+ * i915_gem_object_unpin_pages() - once the pages are no longer referenced
+ * either as a result of memory pressure (reaping pages under the shrinker)
+ * or as the object is itself released.
+ */
+int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	assert_object_held(obj);
+
+	assert_object_held_shared(obj);
+
+	if (unlikely(!i915_gem_object_has_pages(obj))) {
+		GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+		err = ____i915_gem_object_get_pages(obj);
+		if (err)
+			return err;
+
+		smp_mb__before_atomic();
+	}
+	atomic_inc(&obj->mm.pages_pin_count);
+
+	return 0;
+}
+
+int i915_gem_object_pin_pages_unlocked(struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_ww_ctx ww;
+	int err;
+
+	i915_gem_ww_ctx_init(&ww, true);
+retry:
+	err = i915_gem_object_lock(obj, &ww);
+	if (!err)
+		err = i915_gem_object_pin_pages(obj);
+
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	return err;
+}
+
+/* Immediately discard the backing storage */
+int i915_gem_object_truncate(struct drm_i915_gem_object *obj)
+{
+	if (obj->ops->truncate)
+		return obj->ops->truncate(obj);
+
+	return 0;
+}
+
+static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &obj->mm.get_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_page.radix, iter.index);
+	radix_tree_for_each_slot(slot, &obj->mm.get_dma_page.radix, &iter, 0)
+		radix_tree_delete(&obj->mm.get_dma_page.radix, iter.index);
+	rcu_read_unlock();
+}
+
+static void unmap_object(struct drm_i915_gem_object *obj, void *ptr)
+{
+	if (is_vmalloc_addr(ptr))
+		vunmap(ptr);
+}
+
+static void flush_tlb_invalidate(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_gt *gt;
+	int id;
+
+	for_each_gt(gt, i915, id) {
+		if (!obj->mm.tlb[id])
+			continue;
+
+		intel_gt_invalidate_tlb_full(gt, obj->mm.tlb[id]);
+		obj->mm.tlb[id] = 0;
+	}
+}
+
+struct sg_table *
+__i915_gem_object_unset_pages(struct drm_i915_gem_object *obj)
+{
+	struct sg_table *pages;
+
+	assert_object_held_shared(obj);
+
+	pages = fetch_and_zero(&obj->mm.pages);
+	if (IS_ERR_OR_NULL(pages))
+		return pages;
+
+	if (i915_gem_object_is_volatile(obj))
+		obj->mm.madv = I915_MADV_WILLNEED;
+
+	if (!i915_gem_object_has_self_managed_shrink_list(obj))
+		i915_gem_object_make_unshrinkable(obj);
+
+	if (obj->mm.mapping) {
+		unmap_object(obj, page_mask_bits(obj->mm.mapping));
+		obj->mm.mapping = NULL;
+	}
+
+	__i915_gem_object_reset_page_iter(obj);
+	obj->mm.page_sizes.phys = obj->mm.page_sizes.sg = 0;
+
+	flush_tlb_invalidate(obj);
+
+	return pages;
+}
+
+int __i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
+{
+	struct sg_table *pages;
+
+	if (i915_gem_object_has_pinned_pages(obj))
+		return -EBUSY;
+
+	/* May be called by shrinker from within get_pages() (on another bo) */
+	assert_object_held_shared(obj);
+
+	i915_gem_object_release_mmap_offset(obj);
+
+	/*
+	 * ->put_pages might need to allocate memory for the bit17 swizzle
+	 * array, hence protect them from being reaped by removing them from gtt
+	 * lists early.
+	 */
+	pages = __i915_gem_object_unset_pages(obj);
+
+	/*
+	 * XXX Temporary hijinx to avoid updating all backends to handle
+	 * NULL pages. In the future, when we have more asynchronous
+	 * get_pages backends we should be better able to handle the
+	 * cancellation of the async task in a more uniform manner.
+	 */
+	if (!IS_ERR_OR_NULL(pages))
+		obj->ops->put_pages(obj, pages);
+
+	return 0;
+}
+
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj,
+				      enum i915_map_type type)
+{
+	unsigned long n_pages = obj->base.size >> PAGE_SHIFT, i;
+	struct page *stack[32], **pages = stack, *page;
+	struct sgt_iter iter;
+	pgprot_t pgprot;
+	void *vaddr;
+
+	switch (type) {
+	default:
+		MISSING_CASE(type);
+		fallthrough;	/* to use PAGE_KERNEL anyway */
+	case I915_MAP_WB:
+		/*
+		 * On 32b, highmem using a finite set of indirect PTE (i.e.
+		 * vmap) to provide virtual mappings of the high pages.
+		 * As these are finite, map_new_virtual() must wait for some
+		 * other kmap() to finish when it runs out. If we map a large
+		 * number of objects, there is no method for it to tell us
+		 * to release the mappings, and we deadlock.
+		 *
+		 * However, if we make an explicit vmap of the page, that
+		 * uses a larger vmalloc arena, and also has the ability
+		 * to tell us to release unwanted mappings. Most importantly,
+		 * it will fail and propagate an error instead of waiting
+		 * forever.
+		 *
+		 * So if the page is beyond the 32b boundary, make an explicit
+		 * vmap.
+		 */
+		if (n_pages == 1 && !PageHighMem(sg_page(obj->mm.pages->sgl)))
+			return page_address(sg_page(obj->mm.pages->sgl));
+		pgprot = PAGE_KERNEL;
+		break;
+	case I915_MAP_WC:
+		pgprot = pgprot_writecombine(PAGE_KERNEL_IO);
+		break;
+	}
+
+	if (n_pages > ARRAY_SIZE(stack)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL);
+		if (!pages)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	i = 0;
+	for_each_sgt_page(page, iter, obj->mm.pages)
+		pages[i++] = page;
+	vaddr = vmap(pages, n_pages, 0, pgprot);
+	if (pages != stack)
+		kvfree(pages);
+
+	return vaddr ?: ERR_PTR(-ENOMEM);
+}
+
+static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj,
+				     enum i915_map_type type)
+{
+	resource_size_t iomap = obj->mm.region->iomap.base -
+		obj->mm.region->region.start;
+	unsigned long n_pfn = obj->base.size >> PAGE_SHIFT;
+	unsigned long stack[32], *pfns = stack, i;
+	struct sgt_iter iter;
+	dma_addr_t addr;
+	void *vaddr;
+
+	GEM_BUG_ON(type != I915_MAP_WC);
+
+	if (n_pfn > ARRAY_SIZE(stack)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL);
+		if (!pfns)
+			return ERR_PTR(-ENOMEM);
+	}
+
+	i = 0;
+	for_each_sgt_daddr(addr, iter, obj->mm.pages)
+		pfns[i++] = (iomap + addr) >> PAGE_SHIFT;
+	vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO));
+	if (pfns != stack)
+		kvfree(pfns);
+
+	return vaddr ?: ERR_PTR(-ENOMEM);
+}
+
+/* get, pin, and map the pages of the object into kernel space */
+void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj,
+			      enum i915_map_type type)
+{
+	enum i915_map_type has_type;
+	bool pinned;
+	void *ptr;
+	int err;
+
+	if (!i915_gem_object_has_struct_page(obj) &&
+	    !i915_gem_object_has_iomem(obj))
+		return ERR_PTR(-ENXIO);
+
+	if (WARN_ON_ONCE(obj->flags & I915_BO_ALLOC_GPU_ONLY))
+		return ERR_PTR(-EINVAL);
+
+	assert_object_held(obj);
+
+	pinned = !(type & I915_MAP_OVERRIDE);
+	type &= ~I915_MAP_OVERRIDE;
+
+	if (!atomic_inc_not_zero(&obj->mm.pages_pin_count)) {
+		if (unlikely(!i915_gem_object_has_pages(obj))) {
+			GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj));
+
+			err = ____i915_gem_object_get_pages(obj);
+			if (err)
+				return ERR_PTR(err);
+
+			smp_mb__before_atomic();
+		}
+		atomic_inc(&obj->mm.pages_pin_count);
+		pinned = false;
+	}
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+
+	/*
+	 * For discrete our CPU mappings needs to be consistent in order to
+	 * function correctly on !x86. When mapping things through TTM, we use
+	 * the same rules to determine the caching type.
+	 *
+	 * The caching rules, starting from DG1:
+	 *
+	 *	- If the object can be placed in device local-memory, then the
+	 *	  pages should be allocated and mapped as write-combined only.
+	 *
+	 *	- Everything else is always allocated and mapped as write-back,
+	 *	  with the guarantee that everything is also coherent with the
+	 *	  GPU.
+	 *
+	 * Internal users of lmem are already expected to get this right, so no
+	 * fudging needed there.
+	 */
+	if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) {
+		if (type != I915_MAP_WC && !obj->mm.n_placements) {
+			ptr = ERR_PTR(-ENODEV);
+			goto err_unpin;
+		}
+
+		type = I915_MAP_WC;
+	} else if (IS_DGFX(to_i915(obj->base.dev))) {
+		type = I915_MAP_WB;
+	}
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (ptr && has_type != type) {
+		if (pinned) {
+			ptr = ERR_PTR(-EBUSY);
+			goto err_unpin;
+		}
+
+		unmap_object(obj, ptr);
+
+		ptr = obj->mm.mapping = NULL;
+	}
+
+	if (!ptr) {
+		err = i915_gem_object_wait_moving_fence(obj, true);
+		if (err) {
+			ptr = ERR_PTR(err);
+			goto err_unpin;
+		}
+
+		if (GEM_WARN_ON(type == I915_MAP_WC && !pat_enabled()))
+			ptr = ERR_PTR(-ENODEV);
+		else if (i915_gem_object_has_struct_page(obj))
+			ptr = i915_gem_object_map_page(obj, type);
+		else
+			ptr = i915_gem_object_map_pfn(obj, type);
+		if (IS_ERR(ptr))
+			goto err_unpin;
+
+		obj->mm.mapping = page_pack_bits(ptr, type);
+	}
+
+	return ptr;
+
+err_unpin:
+	atomic_dec(&obj->mm.pages_pin_count);
+	return ptr;
+}
+
+void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj,
+				       enum i915_map_type type)
+{
+	void *ret;
+
+	i915_gem_object_lock(obj, NULL);
+	ret = i915_gem_object_pin_map(obj, type);
+	i915_gem_object_unlock(obj);
+
+	return ret;
+}
+
+void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj,
+				 unsigned long offset,
+				 unsigned long size)
+{
+	enum i915_map_type has_type;
+	void *ptr;
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+	GEM_BUG_ON(range_overflows_t(typeof(obj->base.size),
+				     offset, size, obj->base.size));
+
+	wmb(); /* let all previous writes be visible to coherent partners */
+	obj->mm.dirty = true;
+
+	if (obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)
+		return;
+
+	ptr = page_unpack_bits(obj->mm.mapping, &has_type);
+	if (has_type == I915_MAP_WC)
+		return;
+
+	drm_clflush_virt_range(ptr + offset, size);
+	if (size == obj->base.size) {
+		obj->write_domain &= ~I915_GEM_DOMAIN_CPU;
+		obj->cache_dirty = false;
+	}
+}
+
+void __i915_gem_object_release_map(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!obj->mm.mapping);
+
+	/*
+	 * We allow removing the mapping from underneath pinned pages!
+	 *
+	 * Furthermore, since this is an unsafe operation reserved only
+	 * for construction time manipulation, we ignore locking prudence.
+	 */
+	unmap_object(obj, page_mask_bits(fetch_and_zero(&obj->mm.mapping)));
+
+	i915_gem_object_unpin_map(obj);
+}
+
+struct scatterlist *
+__i915_gem_object_page_iter_get_sg(struct drm_i915_gem_object *obj,
+				   struct i915_gem_object_page_iter *iter,
+				   pgoff_t n,
+				   unsigned int *offset)
+
+{
+	const bool dma = iter == &obj->mm.get_dma_page ||
+			 iter == &obj->ttm.get_io_page;
+	unsigned int idx, count;
+	struct scatterlist *sg;
+
+	might_sleep();
+	GEM_BUG_ON(n >= obj->base.size >> PAGE_SHIFT);
+	if (!i915_gem_object_has_pinned_pages(obj))
+		assert_object_held(obj);
+
+	/* As we iterate forward through the sg, we record each entry in a
+	 * radixtree for quick repeated (backwards) lookups. If we have seen
+	 * this index previously, we will have an entry for it.
+	 *
+	 * Initial lookup is O(N), but this is amortized to O(1) for
+	 * sequential page access (where each new request is consecutive
+	 * to the previous one). Repeated lookups are O(lg(obj->base.size)),
+	 * i.e. O(1) with a large constant!
+	 */
+	if (n < READ_ONCE(iter->sg_idx))
+		goto lookup;
+
+	mutex_lock(&iter->lock);
+
+	/* We prefer to reuse the last sg so that repeated lookup of this
+	 * (or the subsequent) sg are fast - comparing against the last
+	 * sg is faster than going through the radixtree.
+	 */
+
+	sg = iter->sg_pos;
+	idx = iter->sg_idx;
+	count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
+
+	while (idx + count <= n) {
+		void *entry;
+		unsigned long i;
+		int ret;
+
+		/* If we cannot allocate and insert this entry, or the
+		 * individual pages from this range, cancel updating the
+		 * sg_idx so that on this lookup we are forced to linearly
+		 * scan onwards, but on future lookups we will try the
+		 * insertion again (in which case we need to be careful of
+		 * the error return reporting that we have already inserted
+		 * this index).
+		 */
+		ret = radix_tree_insert(&iter->radix, idx, sg);
+		if (ret && ret != -EEXIST)
+			goto scan;
+
+		entry = xa_mk_value(idx);
+		for (i = 1; i < count; i++) {
+			ret = radix_tree_insert(&iter->radix, idx + i, entry);
+			if (ret && ret != -EEXIST)
+				goto scan;
+		}
+
+		idx += count;
+		sg = ____sg_next(sg);
+		count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
+	}
+
+scan:
+	iter->sg_pos = sg;
+	iter->sg_idx = idx;
+
+	mutex_unlock(&iter->lock);
+
+	if (unlikely(n < idx)) /* insertion completed by another thread */
+		goto lookup;
+
+	/* In case we failed to insert the entry into the radixtree, we need
+	 * to look beyond the current sg.
+	 */
+	while (idx + count <= n) {
+		idx += count;
+		sg = ____sg_next(sg);
+		count = dma ? __sg_dma_page_count(sg) : __sg_page_count(sg);
+	}
+
+	*offset = n - idx;
+	return sg;
+
+lookup:
+	rcu_read_lock();
+
+	sg = radix_tree_lookup(&iter->radix, n);
+	GEM_BUG_ON(!sg);
+
+	/* If this index is in the middle of multi-page sg entry,
+	 * the radix tree will contain a value entry that points
+	 * to the start of that range. We will return the pointer to
+	 * the base page and the offset of this page within the
+	 * sg entry's range.
+	 */
+	*offset = 0;
+	if (unlikely(xa_is_value(sg))) {
+		unsigned long base = xa_to_value(sg);
+
+		sg = radix_tree_lookup(&iter->radix, base);
+		GEM_BUG_ON(!sg);
+
+		*offset = n - base;
+	}
+
+	rcu_read_unlock();
+
+	return sg;
+}
+
+struct page *
+__i915_gem_object_get_page(struct drm_i915_gem_object *obj, pgoff_t n)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	GEM_BUG_ON(!i915_gem_object_has_struct_page(obj));
+
+	sg = i915_gem_object_get_sg(obj, n, &offset);
+	return nth_page(sg_page(sg), offset);
+}
+
+/* Like i915_gem_object_get_page(), but mark the returned page dirty */
+struct page *
+__i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, pgoff_t n)
+{
+	struct page *page;
+
+	page = i915_gem_object_get_page(obj, n);
+	if (!obj->mm.dirty)
+		set_page_dirty(page);
+
+	return page;
+}
+
+dma_addr_t
+__i915_gem_object_get_dma_address_len(struct drm_i915_gem_object *obj,
+				      pgoff_t n, unsigned int *len)
+{
+	struct scatterlist *sg;
+	unsigned int offset;
+
+	sg = i915_gem_object_get_sg_dma(obj, n, &offset);
+
+	if (len)
+		*len = sg_dma_len(sg) - (offset << PAGE_SHIFT);
+
+	return sg_dma_address(sg) + (offset << PAGE_SHIFT);
+}
+
+dma_addr_t
+__i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, pgoff_t n)
+{
+	return i915_gem_object_get_dma_address_len(obj, n, NULL);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
new file mode 100644
index 0000000000..76efe98eaa
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_phys.c
@@ -0,0 +1,260 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/shmem_fs.h>
+#include <linux/swap.h>
+
+#include <drm/drm_cache.h>
+
+#include "gt/intel_gt.h"
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_gem_region.h"
+#include "i915_gem_tiling.h"
+#include "i915_scatterlist.h"
+
+static int i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct scatterlist *sg;
+	struct sg_table *st;
+	dma_addr_t dma;
+	void *vaddr;
+	void *dst;
+	int i;
+
+	/* Contiguous chunk, with a single scatterlist element */
+	if (overflows_type(obj->base.size, sg->length))
+		return -E2BIG;
+
+	if (GEM_WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
+		return -EINVAL;
+
+	/*
+	 * Always aligning to the object size, allows a single allocation
+	 * to handle all possible callers, and given typical object sizes,
+	 * the alignment of the buddy allocation will naturally match.
+	 */
+	vaddr = dma_alloc_coherent(obj->base.dev->dev,
+				   roundup_pow_of_two(obj->base.size),
+				   &dma, GFP_KERNEL);
+	if (!vaddr)
+		return -ENOMEM;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		goto err_pci;
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL))
+		goto err_st;
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = obj->base.size;
+
+	sg_assign_page(sg, (struct page *)vaddr);
+	sg_dma_address(sg) = dma;
+	sg_dma_len(sg) = obj->base.size;
+
+	dst = vaddr;
+	for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+		struct page *page;
+		void *src;
+
+		page = shmem_read_mapping_page(mapping, i);
+		if (IS_ERR(page))
+			goto err_st;
+
+		src = kmap_atomic(page);
+		memcpy(dst, src, PAGE_SIZE);
+		drm_clflush_virt_range(dst, PAGE_SIZE);
+		kunmap_atomic(src);
+
+		put_page(page);
+		dst += PAGE_SIZE;
+	}
+
+	intel_gt_chipset_flush(to_gt(i915));
+
+	/* We're no longer struct page backed */
+	obj->mem_flags &= ~I915_BO_FLAG_STRUCT_PAGE;
+	__i915_gem_object_set_pages(obj, st);
+
+	return 0;
+
+err_st:
+	kfree(st);
+err_pci:
+	dma_free_coherent(obj->base.dev->dev,
+			  roundup_pow_of_two(obj->base.size),
+			  vaddr, dma);
+	return -ENOMEM;
+}
+
+void
+i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj,
+			       struct sg_table *pages)
+{
+	dma_addr_t dma = sg_dma_address(pages->sgl);
+	void *vaddr = sg_page(pages->sgl);
+
+	__i915_gem_object_release_shmem(obj, pages, false);
+
+	if (obj->mm.dirty) {
+		struct address_space *mapping = obj->base.filp->f_mapping;
+		void *src = vaddr;
+		int i;
+
+		for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+			struct page *page;
+			char *dst;
+
+			page = shmem_read_mapping_page(mapping, i);
+			if (IS_ERR(page))
+				continue;
+
+			dst = kmap_atomic(page);
+			drm_clflush_virt_range(src, PAGE_SIZE);
+			memcpy(dst, src, PAGE_SIZE);
+			kunmap_atomic(dst);
+
+			set_page_dirty(page);
+			if (obj->mm.madv == I915_MADV_WILLNEED)
+				mark_page_accessed(page);
+			put_page(page);
+
+			src += PAGE_SIZE;
+		}
+		obj->mm.dirty = false;
+	}
+
+	sg_free_table(pages);
+	kfree(pages);
+
+	dma_free_coherent(obj->base.dev->dev,
+			  roundup_pow_of_two(obj->base.size),
+			  vaddr, dma);
+}
+
+int i915_gem_object_pwrite_phys(struct drm_i915_gem_object *obj,
+				const struct drm_i915_gem_pwrite *args)
+{
+	void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
+	char __user *user_data = u64_to_user_ptr(args->data_ptr);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	int err;
+
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_ALL,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		return err;
+
+	/*
+	 * We manually control the domain here and pretend that it
+	 * remains coherent i.e. in the GTT domain, like shmem_pwrite.
+	 */
+	i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
+
+	if (copy_from_user(vaddr, user_data, args->size))
+		return -EFAULT;
+
+	drm_clflush_virt_range(vaddr, args->size);
+	intel_gt_chipset_flush(to_gt(i915));
+
+	i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
+	return 0;
+}
+
+int i915_gem_object_pread_phys(struct drm_i915_gem_object *obj,
+			       const struct drm_i915_gem_pread *args)
+{
+	void *vaddr = sg_page(obj->mm.pages->sgl) + args->offset;
+	char __user *user_data = u64_to_user_ptr(args->data_ptr);
+	int err;
+
+	err = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE,
+				   MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		return err;
+
+	drm_clflush_virt_range(vaddr, args->size);
+	if (copy_to_user(user_data, vaddr, args->size))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int i915_gem_object_shmem_to_phys(struct drm_i915_gem_object *obj)
+{
+	struct sg_table *pages;
+	int err;
+
+	pages = __i915_gem_object_unset_pages(obj);
+
+	err = i915_gem_object_get_pages_phys(obj);
+	if (err)
+		goto err_xfer;
+
+	/* Perma-pin (until release) the physical set of pages */
+	__i915_gem_object_pin_pages(obj);
+
+	if (!IS_ERR_OR_NULL(pages))
+		i915_gem_object_put_pages_shmem(obj, pages);
+
+	i915_gem_object_release_memory_region(obj);
+	return 0;
+
+err_xfer:
+	if (!IS_ERR_OR_NULL(pages))
+		__i915_gem_object_set_pages(obj, pages);
+	return err;
+}
+
+int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, int align)
+{
+	int err;
+
+	assert_object_held(obj);
+
+	if (align > obj->base.size)
+		return -EINVAL;
+
+	if (!i915_gem_object_is_shmem(obj))
+		return -EINVAL;
+
+	if (!i915_gem_object_has_struct_page(obj))
+		return 0;
+
+	err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	if (err)
+		return err;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return -EFAULT;
+
+	if (i915_gem_object_has_tiling_quirk(obj))
+		return -EFAULT;
+
+	if (obj->mm.mapping || i915_gem_object_has_pinned_pages(obj))
+		return -EBUSY;
+
+	if (unlikely(obj->mm.madv != I915_MADV_WILLNEED)) {
+		drm_dbg(obj->base.dev,
+			"Attempting to obtain a purgeable object\n");
+		return -EFAULT;
+	}
+
+	return i915_gem_object_shmem_to_phys(obj);
+}
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/i915_gem_phys.c"
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
new file mode 100644
index 0000000000..0d812f4d78
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c
@@ -0,0 +1,256 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "gem/i915_gem_pm.h"
+#include "gem/i915_gem_ttm_pm.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_gt_requests.h"
+
+#include "i915_driver.h"
+#include "i915_drv.h"
+
+#if defined(CONFIG_X86)
+#include <asm/smp.h>
+#else
+#define wbinvd_on_all_cpus() \
+	pr_warn(DRIVER_NAME ": Missing cache flush in %s\n", __func__)
+#endif
+
+void i915_gem_suspend(struct drm_i915_private *i915)
+{
+	struct intel_gt *gt;
+	unsigned int i;
+
+	GEM_TRACE("%s\n", dev_name(i915->drm.dev));
+
+	intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0);
+	flush_workqueue(i915->wq);
+
+	/*
+	 * We have to flush all the executing contexts to main memory so
+	 * that they can saved in the hibernation image. To ensure the last
+	 * context image is coherent, we have to switch away from it. That
+	 * leaves the i915->kernel_context still active when
+	 * we actually suspend, and its image in memory may not match the GPU
+	 * state. Fortunately, the kernel_context is disposable and we do
+	 * not rely on its state.
+	 */
+	for_each_gt(gt, i915, i)
+		intel_gt_suspend_prepare(gt);
+
+	i915_gem_drain_freed_objects(i915);
+}
+
+static int lmem_restore(struct drm_i915_private *i915, u32 flags)
+{
+	struct intel_memory_region *mr;
+	int ret = 0, id;
+
+	for_each_memory_region(mr, i915, id) {
+		if (mr->type == INTEL_MEMORY_LOCAL) {
+			ret = i915_ttm_restore_region(mr, flags);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+static int lmem_suspend(struct drm_i915_private *i915, u32 flags)
+{
+	struct intel_memory_region *mr;
+	int ret = 0, id;
+
+	for_each_memory_region(mr, i915, id) {
+		if (mr->type == INTEL_MEMORY_LOCAL) {
+			ret = i915_ttm_backup_region(mr, flags);
+			if (ret)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+static void lmem_recover(struct drm_i915_private *i915)
+{
+	struct intel_memory_region *mr;
+	int id;
+
+	for_each_memory_region(mr, i915, id)
+		if (mr->type == INTEL_MEMORY_LOCAL)
+			i915_ttm_recover_region(mr);
+}
+
+int i915_gem_backup_suspend(struct drm_i915_private *i915)
+{
+	int ret;
+
+	/* Opportunistically try to evict unpinned objects */
+	ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU);
+	if (ret)
+		goto out_recover;
+
+	i915_gem_suspend(i915);
+
+	/*
+	 * More objects may have become unpinned as requests were
+	 * retired. Now try to evict again. The gt may be wedged here
+	 * in which case we automatically fall back to memcpy.
+	 * We allow also backing up pinned objects that have not been
+	 * marked for early recover, and that may contain, for example,
+	 * page-tables for the migrate context.
+	 */
+	ret = lmem_suspend(i915, I915_TTM_BACKUP_ALLOW_GPU |
+			   I915_TTM_BACKUP_PINNED);
+	if (ret)
+		goto out_recover;
+
+	/*
+	 * Remaining objects are backed up using memcpy once we've stopped
+	 * using the migrate context.
+	 */
+	ret = lmem_suspend(i915, I915_TTM_BACKUP_PINNED);
+	if (ret)
+		goto out_recover;
+
+	return 0;
+
+out_recover:
+	lmem_recover(i915);
+
+	return ret;
+}
+
+void i915_gem_suspend_late(struct drm_i915_private *i915)
+{
+	struct drm_i915_gem_object *obj;
+	struct list_head *phases[] = {
+		&i915->mm.shrink_list,
+		&i915->mm.purge_list,
+		NULL
+	}, **phase;
+	struct intel_gt *gt;
+	unsigned long flags;
+	unsigned int i;
+	bool flush = false;
+
+	/*
+	 * Neither the BIOS, ourselves or any other kernel
+	 * expects the system to be in execlists mode on startup,
+	 * so we need to reset the GPU back to legacy mode. And the only
+	 * known way to disable logical contexts is through a GPU reset.
+	 *
+	 * So in order to leave the system in a known default configuration,
+	 * always reset the GPU upon unload and suspend. Afterwards we then
+	 * clean up the GEM state tracking, flushing off the requests and
+	 * leaving the system in a known idle state.
+	 *
+	 * Note that is of the upmost importance that the GPU is idle and
+	 * all stray writes are flushed *before* we dismantle the backing
+	 * storage for the pinned objects.
+	 *
+	 * However, since we are uncertain that resetting the GPU on older
+	 * machines is a good idea, we don't - just in case it leaves the
+	 * machine in an unusable condition.
+	 */
+
+	for_each_gt(gt, i915, i)
+		intel_gt_suspend_late(gt);
+
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	for (phase = phases; *phase; phase++) {
+		list_for_each_entry(obj, *phase, mm.link) {
+			if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+				flush |= (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0;
+			__start_cpu_write(obj); /* presume auto-hibernate */
+		}
+	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+	if (flush)
+		wbinvd_on_all_cpus();
+}
+
+int i915_gem_freeze(struct drm_i915_private *i915)
+{
+	/* Discard all purgeable objects, let userspace recover those as
+	 * required after resuming.
+	 */
+	i915_gem_shrink_all(i915);
+
+	return 0;
+}
+
+int i915_gem_freeze_late(struct drm_i915_private *i915)
+{
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+
+	/*
+	 * Called just before we write the hibernation image.
+	 *
+	 * We need to update the domain tracking to reflect that the CPU
+	 * will be accessing all the pages to create and restore from the
+	 * hibernation, and so upon restoration those pages will be in the
+	 * CPU domain.
+	 *
+	 * To make sure the hibernation image contains the latest state,
+	 * we update that state just before writing out the image.
+	 *
+	 * To try and reduce the hibernation image, we manually shrink
+	 * the objects as well, see i915_gem_freeze()
+	 */
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		i915_gem_shrink(NULL, i915, -1UL, NULL, ~0);
+	i915_gem_drain_freed_objects(i915);
+
+	wbinvd_on_all_cpus();
+	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link)
+		__start_cpu_write(obj);
+
+	return 0;
+}
+
+void i915_gem_resume(struct drm_i915_private *i915)
+{
+	struct intel_gt *gt;
+	int ret, i, j;
+
+	GEM_TRACE("%s\n", dev_name(i915->drm.dev));
+
+	ret = lmem_restore(i915, 0);
+	GEM_WARN_ON(ret);
+
+	/*
+	 * As we didn't flush the kernel context before suspend, we cannot
+	 * guarantee that the context image is complete. So let's just reset
+	 * it and start again.
+	 */
+	for_each_gt(gt, i915, i)
+		if (intel_gt_resume(gt))
+			goto err_wedged;
+
+	ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU);
+	GEM_WARN_ON(ret);
+
+	return;
+
+err_wedged:
+	for_each_gt(gt, i915, j) {
+		if (!intel_gt_is_wedged(gt)) {
+			dev_err(i915->drm.dev,
+				"Failed to re-initialize GPU[%u], declaring it wedged!\n",
+				j);
+			intel_gt_set_wedged(gt);
+		}
+
+		if (j == i)
+			break;
+	}
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
new file mode 100644
index 0000000000..bedf1e9594
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.h
@@ -0,0 +1,26 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_PM_H__
+#define __I915_GEM_PM_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct work_struct;
+
+void i915_gem_resume(struct drm_i915_private *i915);
+
+void i915_gem_idle_work_handler(struct work_struct *work);
+
+void i915_gem_suspend(struct drm_i915_private *i915);
+void i915_gem_suspend_late(struct drm_i915_private *i915);
+int i915_gem_backup_suspend(struct drm_i915_private *i915);
+
+int i915_gem_freeze(struct drm_i915_private *i915);
+int i915_gem_freeze_late(struct drm_i915_private *i915);
+
+#endif /* __I915_GEM_PM_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c
new file mode 100644
index 0000000000..a4fb577ece
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include <uapi/drm/i915_drm.h>
+
+#include "intel_memory_region.h"
+#include "i915_gem_region.h"
+#include "i915_drv.h"
+#include "i915_trace.h"
+
+void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
+					struct intel_memory_region *mem)
+{
+	obj->mm.region = mem;
+
+	mutex_lock(&mem->objects.lock);
+	list_add(&obj->mm.region_link, &mem->objects.list);
+	mutex_unlock(&mem->objects.lock);
+}
+
+void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj)
+{
+	struct intel_memory_region *mem = obj->mm.region;
+
+	mutex_lock(&mem->objects.lock);
+	list_del(&obj->mm.region_link);
+	mutex_unlock(&mem->objects.lock);
+}
+
+static struct drm_i915_gem_object *
+__i915_gem_object_create_region(struct intel_memory_region *mem,
+				resource_size_t offset,
+				resource_size_t size,
+				resource_size_t page_size,
+				unsigned int flags)
+{
+	struct drm_i915_gem_object *obj;
+	resource_size_t default_page_size;
+	int err;
+
+	/*
+	 * NB: Our use of resource_size_t for the size stems from using struct
+	 * resource for the mem->region. We might need to revisit this in the
+	 * future.
+	 */
+
+	GEM_BUG_ON(flags & ~I915_BO_ALLOC_FLAGS);
+
+	if (WARN_ON_ONCE(flags & I915_BO_ALLOC_GPU_ONLY &&
+			 (flags & I915_BO_ALLOC_CPU_CLEAR ||
+			  flags & I915_BO_ALLOC_PM_EARLY)))
+		return ERR_PTR(-EINVAL);
+
+	if (!mem)
+		return ERR_PTR(-ENODEV);
+
+	default_page_size = mem->min_page_size;
+	if (page_size)
+		default_page_size = page_size;
+
+	/* We should be able to fit a page within an sg entry */
+	GEM_BUG_ON(overflows_type(default_page_size, u32));
+	GEM_BUG_ON(!is_power_of_2_u64(default_page_size));
+	GEM_BUG_ON(default_page_size < PAGE_SIZE);
+
+	size = round_up(size, default_page_size);
+
+	if (default_page_size == size)
+		flags |= I915_BO_ALLOC_CONTIGUOUS;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_MIN_ALIGNMENT));
+
+	if (i915_gem_object_size_2big(size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Anything smaller than the min_page_size can't be freely inserted into
+	 * the GTT, due to alignemnt restrictions. For such special objects,
+	 * make sure we force memcpy based suspend-resume. In the future we can
+	 * revisit this, either by allowing special mis-aligned objects in the
+	 * migration path, or by mapping all of LMEM upfront using cheap 1G
+	 * GTT entries.
+	 */
+	if (default_page_size < mem->min_page_size)
+		flags |= I915_BO_ALLOC_PM_EARLY;
+
+	err = mem->ops->init_object(mem, obj, offset, size, page_size, flags);
+	if (err)
+		goto err_object_free;
+
+	trace_i915_gem_object_create(obj);
+	return obj;
+
+err_object_free:
+	i915_gem_object_free(obj);
+	return ERR_PTR(err);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+			      resource_size_t size,
+			      resource_size_t page_size,
+			      unsigned int flags)
+{
+	return __i915_gem_object_create_region(mem, I915_BO_INVALID_OFFSET,
+					       size, page_size, flags);
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region_at(struct intel_memory_region *mem,
+				 resource_size_t offset,
+				 resource_size_t size,
+				 unsigned int flags)
+{
+	GEM_BUG_ON(offset == I915_BO_INVALID_OFFSET);
+
+	if (GEM_WARN_ON(!IS_ALIGNED(size, mem->min_page_size)) ||
+	    GEM_WARN_ON(!IS_ALIGNED(offset, mem->min_page_size)))
+		return ERR_PTR(-EINVAL);
+
+	if (range_overflows(offset, size, resource_size(&mem->region)))
+		return ERR_PTR(-EINVAL);
+
+	if (!(flags & I915_BO_ALLOC_GPU_ONLY) &&
+	    offset + size > mem->io_size &&
+	    !i915_ggtt_has_aperture(to_gt(mem->i915)->ggtt))
+		return ERR_PTR(-ENOSPC);
+
+	return __i915_gem_object_create_region(mem, offset, size, 0,
+					       flags | I915_BO_ALLOC_CONTIGUOUS);
+}
+
+/**
+ * i915_gem_process_region - Iterate over all objects of a region using ops
+ * to process and optionally skip objects
+ * @mr: The memory region
+ * @apply: ops and private data
+ *
+ * This function can be used to iterate over the regions object list,
+ * checking whether to skip objects, and, if not, lock the objects and
+ * process them using the supplied ops. Note that this function temporarily
+ * removes objects from the region list while iterating, so that if run
+ * concurrently with itself may not iterate over all objects.
+ *
+ * Return: 0 if successful, negative error code on failure.
+ */
+int i915_gem_process_region(struct intel_memory_region *mr,
+			    struct i915_gem_apply_to_region *apply)
+{
+	const struct i915_gem_apply_to_region_ops *ops = apply->ops;
+	struct drm_i915_gem_object *obj;
+	struct list_head still_in_list;
+	int ret = 0;
+
+	/*
+	 * In the future, a non-NULL apply->ww could mean the caller is
+	 * already in a locking transaction and provides its own context.
+	 */
+	GEM_WARN_ON(apply->ww);
+
+	INIT_LIST_HEAD(&still_in_list);
+	mutex_lock(&mr->objects.lock);
+	for (;;) {
+		struct i915_gem_ww_ctx ww;
+
+		obj = list_first_entry_or_null(&mr->objects.list, typeof(*obj),
+					       mm.region_link);
+		if (!obj)
+			break;
+
+		list_move_tail(&obj->mm.region_link, &still_in_list);
+		if (!kref_get_unless_zero(&obj->base.refcount))
+			continue;
+
+		/*
+		 * Note: Someone else might be migrating the object at this
+		 * point. The object's region is not stable until we lock
+		 * the object.
+		 */
+		mutex_unlock(&mr->objects.lock);
+		apply->ww = &ww;
+		for_i915_gem_ww(&ww, ret, apply->interruptible) {
+			ret = i915_gem_object_lock(obj, apply->ww);
+			if (ret)
+				continue;
+
+			if (obj->mm.region == mr)
+				ret = ops->process_obj(apply, obj);
+			/* Implicit object unlock */
+		}
+
+		i915_gem_object_put(obj);
+		mutex_lock(&mr->objects.lock);
+		if (ret)
+			break;
+	}
+	list_splice_tail(&still_in_list, &mr->objects.list);
+	mutex_unlock(&mr->objects.lock);
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.h b/drivers/gpu/drm/i915/gem/i915_gem_region.h
new file mode 100644
index 0000000000..8a7650b27c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_region.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_REGION_H__
+#define __I915_GEM_REGION_H__
+
+#include <linux/types.h>
+
+struct intel_memory_region;
+struct drm_i915_gem_object;
+struct sg_table;
+
+struct i915_gem_apply_to_region;
+
+#define I915_BO_INVALID_OFFSET ((resource_size_t)-1)
+
+/**
+ * struct i915_gem_apply_to_region_ops - ops to use when iterating over all
+ * region objects.
+ */
+struct i915_gem_apply_to_region_ops {
+	/**
+	 * @process_obj: Process the current object
+	 *
+	 * Note that if this function is part of a ww transaction, and
+	 * if returns -EDEADLK for one of the objects, it may be
+	 * rerun for that same object in the same pass.
+	 */
+	int (*process_obj)(struct i915_gem_apply_to_region *apply,
+			   struct drm_i915_gem_object *obj);
+};
+
+/**
+ * struct i915_gem_apply_to_region - Argument to the struct
+ * i915_gem_apply_to_region_ops functions.
+ * @ops: The ops for the operation.
+ * @ww: Locking context used for the transaction.
+ * @interruptible: Whether to perform object locking interruptible.
+ *
+ * This structure is intended to be embedded in a private struct if needed
+ */
+struct i915_gem_apply_to_region {
+	const struct i915_gem_apply_to_region_ops *ops;
+	struct i915_gem_ww_ctx *ww;
+	u32 interruptible:1;
+};
+
+void i915_gem_object_init_memory_region(struct drm_i915_gem_object *obj,
+					struct intel_memory_region *mem);
+void i915_gem_object_release_memory_region(struct drm_i915_gem_object *obj);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_region(struct intel_memory_region *mem,
+			      resource_size_t size,
+			      resource_size_t page_size,
+			      unsigned int flags);
+struct drm_i915_gem_object *
+i915_gem_object_create_region_at(struct intel_memory_region *mem,
+				 resource_size_t offset,
+				 resource_size_t size,
+				 unsigned int flags);
+
+int i915_gem_process_region(struct intel_memory_region *mr,
+			    struct i915_gem_apply_to_region *apply);
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
new file mode 100644
index 0000000000..73a4a4eb29
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
@@ -0,0 +1,738 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/pagevec.h>
+#include <linux/shmem_fs.h>
+#include <linux/swap.h>
+
+#include <drm/drm_cache.h>
+
+#include "gem/i915_gem_region.h"
+#include "i915_drv.h"
+#include "i915_gem_object.h"
+#include "i915_gem_tiling.h"
+#include "i915_gemfs.h"
+#include "i915_scatterlist.h"
+#include "i915_trace.h"
+
+/*
+ * Move folios to appropriate lru and release the batch, decrementing the
+ * ref count of those folios.
+ */
+static void check_release_folio_batch(struct folio_batch *fbatch)
+{
+	check_move_unevictable_folios(fbatch);
+	__folio_batch_release(fbatch);
+	cond_resched();
+}
+
+void shmem_sg_free_table(struct sg_table *st, struct address_space *mapping,
+			 bool dirty, bool backup)
+{
+	struct sgt_iter sgt_iter;
+	struct folio_batch fbatch;
+	struct folio *last = NULL;
+	struct page *page;
+
+	mapping_clear_unevictable(mapping);
+
+	folio_batch_init(&fbatch);
+	for_each_sgt_page(page, sgt_iter, st) {
+		struct folio *folio = page_folio(page);
+
+		if (folio == last)
+			continue;
+		last = folio;
+		if (dirty)
+			folio_mark_dirty(folio);
+		if (backup)
+			folio_mark_accessed(folio);
+
+		if (!folio_batch_add(&fbatch, folio))
+			check_release_folio_batch(&fbatch);
+	}
+	if (fbatch.nr)
+		check_release_folio_batch(&fbatch);
+
+	sg_free_table(st);
+}
+
+int shmem_sg_alloc_table(struct drm_i915_private *i915, struct sg_table *st,
+			 size_t size, struct intel_memory_region *mr,
+			 struct address_space *mapping,
+			 unsigned int max_segment)
+{
+	unsigned int page_count; /* restricted by sg_alloc_table */
+	unsigned long i;
+	struct scatterlist *sg;
+	unsigned long next_pfn = 0;	/* suppress gcc warning */
+	gfp_t noreclaim;
+	int ret;
+
+	if (overflows_type(size / PAGE_SIZE, page_count))
+		return -E2BIG;
+
+	page_count = size / PAGE_SIZE;
+	/*
+	 * If there's no chance of allocating enough pages for the whole
+	 * object, bail early.
+	 */
+	if (size > resource_size(&mr->region))
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, page_count, GFP_KERNEL | __GFP_NOWARN))
+		return -ENOMEM;
+
+	/*
+	 * Get the list of pages out of our struct file.  They'll be pinned
+	 * at this point until we release them.
+	 *
+	 * Fail silently without starting the shrinker
+	 */
+	mapping_set_unevictable(mapping);
+	noreclaim = mapping_gfp_constraint(mapping, ~__GFP_RECLAIM);
+	noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
+
+	sg = st->sgl;
+	st->nents = 0;
+	for (i = 0; i < page_count; i++) {
+		struct folio *folio;
+		unsigned long nr_pages;
+		const unsigned int shrink[] = {
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND,
+			0,
+		}, *s = shrink;
+		gfp_t gfp = noreclaim;
+
+		do {
+			cond_resched();
+			folio = shmem_read_folio_gfp(mapping, i, gfp);
+			if (!IS_ERR(folio))
+				break;
+
+			if (!*s) {
+				ret = PTR_ERR(folio);
+				goto err_sg;
+			}
+
+			i915_gem_shrink(NULL, i915, 2 * page_count, NULL, *s++);
+
+			/*
+			 * We've tried hard to allocate the memory by reaping
+			 * our own buffer, now let the real VM do its job and
+			 * go down in flames if truly OOM.
+			 *
+			 * However, since graphics tend to be disposable,
+			 * defer the oom here by reporting the ENOMEM back
+			 * to userspace.
+			 */
+			if (!*s) {
+				/* reclaim and warn, but no oom */
+				gfp = mapping_gfp_mask(mapping);
+
+				/*
+				 * Our bo are always dirty and so we require
+				 * kswapd to reclaim our pages (direct reclaim
+				 * does not effectively begin pageout of our
+				 * buffers on its own). However, direct reclaim
+				 * only waits for kswapd when under allocation
+				 * congestion. So as a result __GFP_RECLAIM is
+				 * unreliable and fails to actually reclaim our
+				 * dirty pages -- unless you try over and over
+				 * again with !__GFP_NORETRY. However, we still
+				 * want to fail this allocation rather than
+				 * trigger the out-of-memory killer and for
+				 * this we want __GFP_RETRY_MAYFAIL.
+				 */
+				gfp |= __GFP_RETRY_MAYFAIL | __GFP_NOWARN;
+			}
+		} while (1);
+
+		nr_pages = min_t(unsigned long,
+				folio_nr_pages(folio), page_count - i);
+		if (!i ||
+		    sg->length >= max_segment ||
+		    folio_pfn(folio) != next_pfn) {
+			if (i)
+				sg = sg_next(sg);
+
+			st->nents++;
+			sg_set_folio(sg, folio, nr_pages * PAGE_SIZE, 0);
+		} else {
+			/* XXX: could overflow? */
+			sg->length += nr_pages * PAGE_SIZE;
+		}
+		next_pfn = folio_pfn(folio) + nr_pages;
+		i += nr_pages - 1;
+
+		/* Check that the i965g/gm workaround works. */
+		GEM_BUG_ON(gfp & __GFP_DMA32 && next_pfn >= 0x00100000UL);
+	}
+	if (sg) /* loop terminated early; short sg table */
+		sg_mark_end(sg);
+
+	/* Trim unused sg entries to avoid wasting memory. */
+	i915_sg_trim(st);
+
+	return 0;
+err_sg:
+	sg_mark_end(sg);
+	if (sg != st->sgl) {
+		shmem_sg_free_table(st, mapping, false, false);
+	} else {
+		mapping_clear_unevictable(mapping);
+		sg_free_table(st);
+	}
+
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	return ret;
+}
+
+static int shmem_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_memory_region *mem = obj->mm.region;
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
+	struct sg_table *st;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+	int ret;
+
+	/*
+	 * Assert that the object is not currently in any GPU domain. As it
+	 * wasn't in the GTT, there shouldn't be any way it could have been in
+	 * a GPU cache
+	 */
+	GEM_BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
+	GEM_BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
+
+rebuild_st:
+	st = kmalloc(sizeof(*st), GFP_KERNEL | __GFP_NOWARN);
+	if (!st)
+		return -ENOMEM;
+
+	ret = shmem_sg_alloc_table(i915, st, obj->base.size, mem, mapping,
+				   max_segment);
+	if (ret)
+		goto err_st;
+
+	ret = i915_gem_gtt_prepare_pages(obj, st);
+	if (ret) {
+		/*
+		 * DMA remapping failed? One possible cause is that
+		 * it could not reserve enough large entries, asking
+		 * for PAGE_SIZE chunks instead may be helpful.
+		 */
+		if (max_segment > PAGE_SIZE) {
+			for_each_sgt_page(page, sgt_iter, st)
+				put_page(page);
+			sg_free_table(st);
+			kfree(st);
+
+			max_segment = PAGE_SIZE;
+			goto rebuild_st;
+		} else {
+			dev_warn(i915->drm.dev,
+				 "Failed to DMA remap %zu pages\n",
+				 obj->base.size >> PAGE_SHIFT);
+			goto err_pages;
+		}
+	}
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_do_bit_17_swizzle(obj, st);
+
+	if (i915_gem_object_can_bypass_llc(obj))
+		obj->cache_dirty = true;
+
+	__i915_gem_object_set_pages(obj, st);
+
+	return 0;
+
+err_pages:
+	shmem_sg_free_table(st, mapping, false, false);
+	/*
+	 * shmemfs first checks if there is enough memory to allocate the page
+	 * and reports ENOSPC should there be insufficient, along with the usual
+	 * ENOMEM for a genuine allocation failure.
+	 *
+	 * We use ENOSPC in our driver to mean that we have run out of aperture
+	 * space and so want to translate the error from shmemfs back to our
+	 * usual understanding of ENOMEM.
+	 */
+err_st:
+	if (ret == -ENOSPC)
+		ret = -ENOMEM;
+
+	kfree(st);
+
+	return ret;
+}
+
+static int
+shmem_truncate(struct drm_i915_gem_object *obj)
+{
+	/*
+	 * Our goal here is to return as much of the memory as
+	 * is possible back to the system as we are called from OOM.
+	 * To do this we must instruct the shmfs to drop all of its
+	 * backing pages, *now*.
+	 */
+	shmem_truncate_range(file_inode(obj->base.filp), 0, (loff_t)-1);
+	obj->mm.madv = __I915_MADV_PURGED;
+	obj->mm.pages = ERR_PTR(-EFAULT);
+
+	return 0;
+}
+
+void __shmem_writeback(size_t size, struct address_space *mapping)
+{
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_NONE,
+		.nr_to_write = SWAP_CLUSTER_MAX,
+		.range_start = 0,
+		.range_end = LLONG_MAX,
+		.for_reclaim = 1,
+	};
+	unsigned long i;
+
+	/*
+	 * Leave mmapings intact (GTT will have been revoked on unbinding,
+	 * leaving only CPU mmapings around) and add those pages to the LRU
+	 * instead of invoking writeback so they are aged and paged out
+	 * as normal.
+	 */
+
+	/* Begin writeback on each dirty page */
+	for (i = 0; i < size >> PAGE_SHIFT; i++) {
+		struct page *page;
+
+		page = find_lock_page(mapping, i);
+		if (!page)
+			continue;
+
+		if (!page_mapped(page) && clear_page_dirty_for_io(page)) {
+			int ret;
+
+			SetPageReclaim(page);
+			ret = mapping->a_ops->writepage(page, &wbc);
+			if (!PageWriteback(page))
+				ClearPageReclaim(page);
+			if (!ret)
+				goto put;
+		}
+		unlock_page(page);
+put:
+		put_page(page);
+	}
+}
+
+static void
+shmem_writeback(struct drm_i915_gem_object *obj)
+{
+	__shmem_writeback(obj->base.size, obj->base.filp->f_mapping);
+}
+
+static int shmem_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
+{
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		return i915_gem_object_truncate(obj);
+	case __I915_MADV_PURGED:
+		return 0;
+	}
+
+	if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
+		shmem_writeback(obj);
+
+	return 0;
+}
+
+void
+__i915_gem_object_release_shmem(struct drm_i915_gem_object *obj,
+				struct sg_table *pages,
+				bool needs_clflush)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	GEM_BUG_ON(obj->mm.madv == __I915_MADV_PURGED);
+
+	if (obj->mm.madv == I915_MADV_DONTNEED)
+		obj->mm.dirty = false;
+
+	if (needs_clflush &&
+	    (obj->read_domains & I915_GEM_DOMAIN_CPU) == 0 &&
+	    !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
+		drm_clflush_sg(pages);
+
+	__start_cpu_write(obj);
+	/*
+	 * On non-LLC igfx platforms, force the flush-on-acquire if this is ever
+	 * swapped-in. Our async flush path is not trust worthy enough yet(and
+	 * happens in the wrong order), and with some tricks it's conceivable
+	 * for userspace to change the cache-level to I915_CACHE_NONE after the
+	 * pages are swapped-in, and since execbuf binds the object before doing
+	 * the async flush, we have a race window.
+	 */
+	if (!HAS_LLC(i915) && !IS_DGFX(i915))
+		obj->cache_dirty = true;
+}
+
+void i915_gem_object_put_pages_shmem(struct drm_i915_gem_object *obj, struct sg_table *pages)
+{
+	__i915_gem_object_release_shmem(obj, pages, true);
+
+	i915_gem_gtt_finish_pages(obj, pages);
+
+	if (i915_gem_object_needs_bit17_swizzle(obj))
+		i915_gem_object_save_bit_17_swizzle(obj, pages);
+
+	shmem_sg_free_table(pages, file_inode(obj->base.filp)->i_mapping,
+			    obj->mm.dirty, obj->mm.madv == I915_MADV_WILLNEED);
+	kfree(pages);
+	obj->mm.dirty = false;
+}
+
+static void
+shmem_put_pages(struct drm_i915_gem_object *obj, struct sg_table *pages)
+{
+	if (likely(i915_gem_object_has_struct_page(obj)))
+		i915_gem_object_put_pages_shmem(obj, pages);
+	else
+		i915_gem_object_put_pages_phys(obj, pages);
+}
+
+static int
+shmem_pwrite(struct drm_i915_gem_object *obj,
+	     const struct drm_i915_gem_pwrite *arg)
+{
+	struct address_space *mapping = obj->base.filp->f_mapping;
+	const struct address_space_operations *aops = mapping->a_ops;
+	char __user *user_data = u64_to_user_ptr(arg->data_ptr);
+	u64 remain, offset;
+	unsigned int pg;
+
+	/* Caller already validated user args */
+	GEM_BUG_ON(!access_ok(user_data, arg->size));
+
+	if (!i915_gem_object_has_struct_page(obj))
+		return i915_gem_object_pwrite_phys(obj, arg);
+
+	/*
+	 * Before we instantiate/pin the backing store for our use, we
+	 * can prepopulate the shmemfs filp efficiently using a write into
+	 * the pagecache. We avoid the penalty of instantiating all the
+	 * pages, important if the user is just writing to a few and never
+	 * uses the object on the GPU, and using a direct write into shmemfs
+	 * allows it to avoid the cost of retrieving a page (either swapin
+	 * or clearing-before-use) before it is overwritten.
+	 */
+	if (i915_gem_object_has_pages(obj))
+		return -ENODEV;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED)
+		return -EFAULT;
+
+	/*
+	 * Before the pages are instantiated the object is treated as being
+	 * in the CPU domain. The pages will be clflushed as required before
+	 * use, and we can freely write into the pages directly. If userspace
+	 * races pwrite with any other operation; corruption will ensue -
+	 * that is userspace's prerogative!
+	 */
+
+	remain = arg->size;
+	offset = arg->offset;
+	pg = offset_in_page(offset);
+
+	do {
+		unsigned int len, unwritten;
+		struct page *page;
+		void *data, *vaddr;
+		int err;
+		char __maybe_unused c;
+
+		len = PAGE_SIZE - pg;
+		if (len > remain)
+			len = remain;
+
+		/* Prefault the user page to reduce potential recursion */
+		err = __get_user(c, user_data);
+		if (err)
+			return err;
+
+		err = __get_user(c, user_data + len - 1);
+		if (err)
+			return err;
+
+		err = aops->write_begin(obj->base.filp, mapping, offset, len,
+					&page, &data);
+		if (err < 0)
+			return err;
+
+		vaddr = kmap_atomic(page);
+		unwritten = __copy_from_user_inatomic(vaddr + pg,
+						      user_data,
+						      len);
+		kunmap_atomic(vaddr);
+
+		err = aops->write_end(obj->base.filp, mapping, offset, len,
+				      len - unwritten, page, data);
+		if (err < 0)
+			return err;
+
+		/* We don't handle -EFAULT, leave it to the caller to check */
+		if (unwritten)
+			return -ENODEV;
+
+		remain -= len;
+		user_data += len;
+		offset += len;
+		pg = 0;
+	} while (remain);
+
+	return 0;
+}
+
+static int
+shmem_pread(struct drm_i915_gem_object *obj,
+	    const struct drm_i915_gem_pread *arg)
+{
+	if (!i915_gem_object_has_struct_page(obj))
+		return i915_gem_object_pread_phys(obj, arg);
+
+	return -ENODEV;
+}
+
+static void shmem_release(struct drm_i915_gem_object *obj)
+{
+	if (i915_gem_object_has_struct_page(obj))
+		i915_gem_object_release_memory_region(obj);
+
+	fput(obj->base.filp);
+}
+
+const struct drm_i915_gem_object_ops i915_gem_shmem_ops = {
+	.name = "i915_gem_object_shmem",
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+
+	.get_pages = shmem_get_pages,
+	.put_pages = shmem_put_pages,
+	.truncate = shmem_truncate,
+	.shrink = shmem_shrink,
+
+	.pwrite = shmem_pwrite,
+	.pread = shmem_pread,
+
+	.release = shmem_release,
+};
+
+static int __create_shmem(struct drm_i915_private *i915,
+			  struct drm_gem_object *obj,
+			  resource_size_t size)
+{
+	unsigned long flags = VM_NORESERVE;
+	struct file *filp;
+
+	drm_gem_private_object_init(&i915->drm, obj, size);
+
+	/* XXX: The __shmem_file_setup() function returns -EINVAL if size is
+	 * greater than MAX_LFS_FILESIZE.
+	 * To handle the same error as other code that returns -E2BIG when
+	 * the size is too large, we add a code that returns -E2BIG when the
+	 * size is larger than the size that can be handled.
+	 * If BITS_PER_LONG is 32, size > MAX_LFS_FILESIZE is always false,
+	 * so we only needs to check when BITS_PER_LONG is 64.
+	 * If BITS_PER_LONG is 32, E2BIG checks are processed when
+	 * i915_gem_object_size_2big() is called before init_object() callback
+	 * is called.
+	 */
+	if (BITS_PER_LONG == 64 && size > MAX_LFS_FILESIZE)
+		return -E2BIG;
+
+	if (i915->mm.gemfs)
+		filp = shmem_file_setup_with_mnt(i915->mm.gemfs, "i915", size,
+						 flags);
+	else
+		filp = shmem_file_setup("i915", size, flags);
+	if (IS_ERR(filp))
+		return PTR_ERR(filp);
+
+	obj->filp = filp;
+	return 0;
+}
+
+static int shmem_object_init(struct intel_memory_region *mem,
+			     struct drm_i915_gem_object *obj,
+			     resource_size_t offset,
+			     resource_size_t size,
+			     resource_size_t page_size,
+			     unsigned int flags)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_private *i915 = mem->i915;
+	struct address_space *mapping;
+	unsigned int cache_level;
+	gfp_t mask;
+	int ret;
+
+	ret = __create_shmem(i915, &obj->base, size);
+	if (ret)
+		return ret;
+
+	mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		mask &= ~__GFP_HIGHMEM;
+		mask |= __GFP_DMA32;
+	}
+
+	mapping = obj->base.filp->f_mapping;
+	mapping_set_gfp_mask(mapping, mask);
+	GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+	i915_gem_object_init(obj, &i915_gem_shmem_ops, &lock_class, flags);
+	obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+
+	/*
+	 * MTL doesn't snoop CPU cache by default for GPU access (namely
+	 * 1-way coherency). However some UMD's are currently depending on
+	 * that. Make 1-way coherent the default setting for MTL. A follow
+	 * up patch will extend the GEM_CREATE uAPI to allow UMD's specify
+	 * caching mode at BO creation time
+	 */
+	if (HAS_LLC(i915) || (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)))
+		/* On some devices, we can have the GPU use the LLC (the CPU
+		 * cache) for about a 10% performance improvement
+		 * compared to uncached.  Graphics requests other than
+		 * display scanout are coherent with the CPU in
+		 * accessing this cache.  This means in this mode we
+		 * don't need to clflush on the CPU side, and on the
+		 * GPU side we only need to flush internal caches to
+		 * get data visible to the CPU.
+		 *
+		 * However, we maintain the display planes as UC, and so
+		 * need to rebind when first used as such.
+		 */
+		cache_level = I915_CACHE_LLC;
+	else
+		cache_level = I915_CACHE_NONE;
+
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	i915_gem_object_init_memory_region(obj, mem);
+
+	return 0;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem(struct drm_i915_private *i915,
+			     resource_size_t size)
+{
+	return i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
+					     size, 0, 0);
+}
+
+/* Allocate a new GEM object and fill it with the supplied data */
+struct drm_i915_gem_object *
+i915_gem_object_create_shmem_from_data(struct drm_i915_private *dev_priv,
+				       const void *data, resource_size_t size)
+{
+	struct drm_i915_gem_object *obj;
+	struct file *file;
+	const struct address_space_operations *aops;
+	resource_size_t offset;
+	int err;
+
+	GEM_WARN_ON(IS_DGFX(dev_priv));
+	obj = i915_gem_object_create_shmem(dev_priv, round_up(size, PAGE_SIZE));
+	if (IS_ERR(obj))
+		return obj;
+
+	GEM_BUG_ON(obj->write_domain != I915_GEM_DOMAIN_CPU);
+
+	file = obj->base.filp;
+	aops = file->f_mapping->a_ops;
+	offset = 0;
+	do {
+		unsigned int len = min_t(typeof(size), size, PAGE_SIZE);
+		struct page *page;
+		void *pgdata, *vaddr;
+
+		err = aops->write_begin(file, file->f_mapping, offset, len,
+					&page, &pgdata);
+		if (err < 0)
+			goto fail;
+
+		vaddr = kmap(page);
+		memcpy(vaddr, data, len);
+		kunmap(page);
+
+		err = aops->write_end(file, file->f_mapping, offset, len, len,
+				      page, pgdata);
+		if (err < 0)
+			goto fail;
+
+		size -= len;
+		data += len;
+		offset += len;
+	} while (size);
+
+	return obj;
+
+fail:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+static int init_shmem(struct intel_memory_region *mem)
+{
+	i915_gemfs_init(mem->i915);
+	intel_memory_region_set_name(mem, "system");
+
+	return 0; /* We have fallback to the kernel mnt if gemfs init failed. */
+}
+
+static int release_shmem(struct intel_memory_region *mem)
+{
+	i915_gemfs_fini(mem->i915);
+	return 0;
+}
+
+static const struct intel_memory_region_ops shmem_region_ops = {
+	.init = init_shmem,
+	.release = release_shmem,
+	.init_object = shmem_object_init,
+};
+
+struct intel_memory_region *i915_gem_shmem_setup(struct drm_i915_private *i915,
+						 u16 type, u16 instance)
+{
+	return intel_memory_region_create(i915, 0,
+					  totalram_pages() << PAGE_SHIFT,
+					  PAGE_SIZE, 0, 0,
+					  type, instance,
+					  &shmem_region_ops);
+}
+
+bool i915_gem_object_is_shmem(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops == &i915_gem_shmem_ops;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
new file mode 100644
index 0000000000..214763942a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
@@ -0,0 +1,585 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2015 Intel Corporation
+ */
+
+#include <linux/oom.h>
+#include <linux/sched/mm.h>
+#include <linux/shmem_fs.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/pci.h>
+#include <linux/dma-buf.h>
+#include <linux/vmalloc.h>
+
+#include "gt/intel_gt_requests.h"
+
+#include "i915_trace.h"
+
+static bool swap_available(void)
+{
+	return get_nr_swap_pages() > 0;
+}
+
+static bool can_release_pages(struct drm_i915_gem_object *obj)
+{
+	/* Consider only shrinkable ojects. */
+	if (!i915_gem_object_is_shrinkable(obj))
+		return false;
+
+	/*
+	 * We can only return physical pages to the system if we can either
+	 * discard the contents (because the user has marked them as being
+	 * purgeable) or if we can move their contents out to swap.
+	 */
+	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
+}
+
+static bool drop_pages(struct drm_i915_gem_object *obj,
+		       unsigned long shrink, bool trylock_vm)
+{
+	unsigned long flags;
+
+	flags = 0;
+	if (shrink & I915_SHRINK_ACTIVE)
+		flags |= I915_GEM_OBJECT_UNBIND_ACTIVE;
+	if (!(shrink & I915_SHRINK_BOUND))
+		flags |= I915_GEM_OBJECT_UNBIND_TEST;
+	if (trylock_vm)
+		flags |= I915_GEM_OBJECT_UNBIND_VM_TRYLOCK;
+
+	if (i915_gem_object_unbind(obj, flags) == 0)
+		return true;
+
+	return false;
+}
+
+static int try_to_writeback(struct drm_i915_gem_object *obj, unsigned int flags)
+{
+	if (obj->ops->shrink) {
+		unsigned int shrink_flags = 0;
+
+		if (!(flags & I915_SHRINK_ACTIVE))
+			shrink_flags |= I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT;
+
+		if (flags & I915_SHRINK_WRITEBACK)
+			shrink_flags |= I915_GEM_OBJECT_SHRINK_WRITEBACK;
+
+		return obj->ops->shrink(obj, shrink_flags);
+	}
+
+	return 0;
+}
+
+/**
+ * i915_gem_shrink - Shrink buffer object caches
+ * @ww: i915 gem ww acquire ctx, or NULL
+ * @i915: i915 device
+ * @target: amount of memory to make available, in pages
+ * @nr_scanned: optional output for number of pages scanned (incremental)
+ * @shrink: control flags for selecting cache types
+ *
+ * This function is the main interface to the shrinker. It will try to release
+ * up to @target pages of main memory backing storage from buffer objects.
+ * Selection of the specific caches can be done with @flags. This is e.g. useful
+ * when purgeable objects should be removed from caches preferentially.
+ *
+ * Note that it's not guaranteed that released amount is actually available as
+ * free system memory - the pages might still be in-used to due to other reasons
+ * (like cpu mmaps) or the mm core has reused them before we could grab them.
+ * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to
+ * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all().
+ *
+ * Also note that any kind of pinning (both per-vma address space pins and
+ * backing storage pins at the buffer object level) result in the shrinker code
+ * having to skip the object.
+ *
+ * Returns:
+ * The number of pages of backing storage actually released.
+ */
+unsigned long
+i915_gem_shrink(struct i915_gem_ww_ctx *ww,
+		struct drm_i915_private *i915,
+		unsigned long target,
+		unsigned long *nr_scanned,
+		unsigned int shrink)
+{
+	const struct {
+		struct list_head *list;
+		unsigned int bit;
+	} phases[] = {
+		{ &i915->mm.purge_list, ~0u },
+		{
+			&i915->mm.shrink_list,
+			I915_SHRINK_BOUND | I915_SHRINK_UNBOUND
+		},
+		{ NULL, 0 },
+	}, *phase;
+	intel_wakeref_t wakeref = 0;
+	unsigned long count = 0;
+	unsigned long scanned = 0;
+	int err = 0;
+
+	/* CHV + VTD workaround use stop_machine(); need to trylock vm->mutex */
+	bool trylock_vm = !ww && intel_vm_no_concurrent_access_wa(i915);
+
+	trace_i915_gem_shrink(i915, target, shrink);
+
+	/*
+	 * Unbinding of objects will require HW access; Let us not wake the
+	 * device just to recover a little memory. If absolutely necessary,
+	 * we will force the wake during oom-notifier.
+	 */
+	if (shrink & I915_SHRINK_BOUND) {
+		wakeref = intel_runtime_pm_get_if_in_use(&i915->runtime_pm);
+		if (!wakeref)
+			shrink &= ~I915_SHRINK_BOUND;
+	}
+
+	/*
+	 * When shrinking the active list, we should also consider active
+	 * contexts. Active contexts are pinned until they are retired, and
+	 * so can not be simply unbound to retire and unpin their pages. To
+	 * shrink the contexts, we must wait until the gpu is idle and
+	 * completed its switch to the kernel context. In short, we do
+	 * not have a good mechanism for idling a specific context, but
+	 * what we can do is give them a kick so that we do not keep idle
+	 * contexts around longer than is necessary.
+	 */
+	if (shrink & I915_SHRINK_ACTIVE)
+		/* Retire requests to unpin all idle contexts */
+		intel_gt_retire_requests(to_gt(i915));
+
+	/*
+	 * As we may completely rewrite the (un)bound list whilst unbinding
+	 * (due to retiring requests) we have to strictly process only
+	 * one element of the list at the time, and recheck the list
+	 * on every iteration.
+	 *
+	 * In particular, we must hold a reference whilst removing the
+	 * object as we may end up waiting for and/or retiring the objects.
+	 * This might release the final reference (held by the active list)
+	 * and result in the object being freed from under us. This is
+	 * similar to the precautions the eviction code must take whilst
+	 * removing objects.
+	 *
+	 * Also note that although these lists do not hold a reference to
+	 * the object we can safely grab one here: The final object
+	 * unreferencing and the bound_list are both protected by the
+	 * dev->struct_mutex and so we won't ever be able to observe an
+	 * object on the bound_list with a reference count equals 0.
+	 */
+	for (phase = phases; phase->list; phase++) {
+		struct list_head still_in_list;
+		struct drm_i915_gem_object *obj;
+		unsigned long flags;
+
+		if ((shrink & phase->bit) == 0)
+			continue;
+
+		INIT_LIST_HEAD(&still_in_list);
+
+		/*
+		 * We serialize our access to unreferenced objects through
+		 * the use of the struct_mutex. While the objects are not
+		 * yet freed (due to RCU then a workqueue) we still want
+		 * to be able to shrink their pages, so they remain on
+		 * the unbound/bound list until actually freed.
+		 */
+		spin_lock_irqsave(&i915->mm.obj_lock, flags);
+		while (count < target &&
+		       (obj = list_first_entry_or_null(phase->list,
+						       typeof(*obj),
+						       mm.link))) {
+			list_move_tail(&obj->mm.link, &still_in_list);
+
+			if (shrink & I915_SHRINK_VMAPS &&
+			    !is_vmalloc_addr(obj->mm.mapping))
+				continue;
+
+			if (!(shrink & I915_SHRINK_ACTIVE) &&
+			    i915_gem_object_is_framebuffer(obj))
+				continue;
+
+			if (!can_release_pages(obj))
+				continue;
+
+			if (!kref_get_unless_zero(&obj->base.refcount))
+				continue;
+
+			spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+
+			/* May arrive from get_pages on another bo */
+			if (!ww) {
+				if (!i915_gem_object_trylock(obj, NULL))
+					goto skip;
+			} else {
+				err = i915_gem_object_lock(obj, ww);
+				if (err)
+					goto skip;
+			}
+
+			if (drop_pages(obj, shrink, trylock_vm) &&
+			    !__i915_gem_object_put_pages(obj) &&
+			    !try_to_writeback(obj, shrink))
+				count += obj->base.size >> PAGE_SHIFT;
+
+			if (!ww)
+				i915_gem_object_unlock(obj);
+
+			scanned += obj->base.size >> PAGE_SHIFT;
+skip:
+			i915_gem_object_put(obj);
+
+			spin_lock_irqsave(&i915->mm.obj_lock, flags);
+			if (err)
+				break;
+		}
+		list_splice_tail(&still_in_list, phase->list);
+		spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+		if (err)
+			break;
+	}
+
+	if (shrink & I915_SHRINK_BOUND)
+		intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+
+	if (err)
+		return err;
+
+	if (nr_scanned)
+		*nr_scanned += scanned;
+	return count;
+}
+
+/**
+ * i915_gem_shrink_all - Shrink buffer object caches completely
+ * @i915: i915 device
+ *
+ * This is a simple wraper around i915_gem_shrink() to aggressively shrink all
+ * caches completely. It also first waits for and retires all outstanding
+ * requests to also be able to release backing storage for active objects.
+ *
+ * This should only be used in code to intentionally quiescent the gpu or as a
+ * last-ditch effort when memory seems to have run out.
+ *
+ * Returns:
+ * The number of pages of backing storage actually released.
+ */
+unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
+{
+	intel_wakeref_t wakeref;
+	unsigned long freed = 0;
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+		freed = i915_gem_shrink(NULL, i915, -1UL, NULL,
+					I915_SHRINK_BOUND |
+					I915_SHRINK_UNBOUND);
+	}
+
+	return freed;
+}
+
+static unsigned long
+i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct drm_i915_private *i915 =
+		container_of(shrinker, struct drm_i915_private, mm.shrinker);
+	unsigned long num_objects;
+	unsigned long count;
+
+	count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT;
+	num_objects = READ_ONCE(i915->mm.shrink_count);
+
+	/*
+	 * Update our preferred vmscan batch size for the next pass.
+	 * Our rough guess for an effective batch size is roughly 2
+	 * available GEM objects worth of pages. That is we don't want
+	 * the shrinker to fire, until it is worth the cost of freeing an
+	 * entire GEM object.
+	 */
+	if (num_objects) {
+		unsigned long avg = 2 * count / num_objects;
+
+		i915->mm.shrinker.batch =
+			max((i915->mm.shrinker.batch + avg) >> 1,
+			    128ul /* default SHRINK_BATCH */);
+	}
+
+	return count;
+}
+
+static unsigned long
+i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
+{
+	struct drm_i915_private *i915 =
+		container_of(shrinker, struct drm_i915_private, mm.shrinker);
+	unsigned long freed;
+
+	sc->nr_scanned = 0;
+
+	freed = i915_gem_shrink(NULL, i915,
+				sc->nr_to_scan,
+				&sc->nr_scanned,
+				I915_SHRINK_BOUND |
+				I915_SHRINK_UNBOUND);
+	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
+		intel_wakeref_t wakeref;
+
+		with_intel_runtime_pm(&i915->runtime_pm, wakeref) {
+			freed += i915_gem_shrink(NULL, i915,
+						 sc->nr_to_scan - sc->nr_scanned,
+						 &sc->nr_scanned,
+						 I915_SHRINK_ACTIVE |
+						 I915_SHRINK_BOUND |
+						 I915_SHRINK_UNBOUND |
+						 I915_SHRINK_WRITEBACK);
+		}
+	}
+
+	return sc->nr_scanned ? freed : SHRINK_STOP;
+}
+
+static int
+i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct drm_i915_private *i915 =
+		container_of(nb, struct drm_i915_private, mm.oom_notifier);
+	struct drm_i915_gem_object *obj;
+	unsigned long unevictable, available, freed_pages;
+	intel_wakeref_t wakeref;
+	unsigned long flags;
+
+	freed_pages = 0;
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL,
+					       I915_SHRINK_BOUND |
+					       I915_SHRINK_UNBOUND |
+					       I915_SHRINK_WRITEBACK);
+
+	/* Because we may be allocating inside our own driver, we cannot
+	 * assert that there are no objects with pinned pages that are not
+	 * being pointed to by hardware.
+	 */
+	available = unevictable = 0;
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	list_for_each_entry(obj, &i915->mm.shrink_list, mm.link) {
+		if (!can_release_pages(obj))
+			unevictable += obj->base.size >> PAGE_SHIFT;
+		else
+			available += obj->base.size >> PAGE_SHIFT;
+	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+
+	if (freed_pages || available)
+		pr_info("Purging GPU memory, %lu pages freed, "
+			"%lu pages still pinned, %lu pages left available.\n",
+			freed_pages, unevictable, available);
+
+	*(unsigned long *)ptr += freed_pages;
+	return NOTIFY_DONE;
+}
+
+static int
+i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
+{
+	struct drm_i915_private *i915 =
+		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
+	struct i915_vma *vma, *next;
+	unsigned long freed_pages = 0;
+	intel_wakeref_t wakeref;
+
+	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
+		freed_pages += i915_gem_shrink(NULL, i915, -1UL, NULL,
+					       I915_SHRINK_BOUND |
+					       I915_SHRINK_UNBOUND |
+					       I915_SHRINK_VMAPS);
+
+	/* We also want to clear any cached iomaps as they wrap vmap */
+	mutex_lock(&to_gt(i915)->ggtt->vm.mutex);
+	list_for_each_entry_safe(vma, next,
+				 &to_gt(i915)->ggtt->vm.bound_list, vm_link) {
+		unsigned long count = i915_vma_size(vma) >> PAGE_SHIFT;
+		struct drm_i915_gem_object *obj = vma->obj;
+
+		if (!vma->iomap || i915_vma_is_active(vma))
+			continue;
+
+		if (!i915_gem_object_trylock(obj, NULL))
+			continue;
+
+		if (__i915_vma_unbind(vma) == 0)
+			freed_pages += count;
+
+		i915_gem_object_unlock(obj);
+	}
+	mutex_unlock(&to_gt(i915)->ggtt->vm.mutex);
+
+	*(unsigned long *)ptr += freed_pages;
+	return NOTIFY_DONE;
+}
+
+void i915_gem_driver_register__shrinker(struct drm_i915_private *i915)
+{
+	i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
+	i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
+	i915->mm.shrinker.seeks = DEFAULT_SEEKS;
+	i915->mm.shrinker.batch = 4096;
+	drm_WARN_ON(&i915->drm, register_shrinker(&i915->mm.shrinker,
+						  "drm-i915_gem"));
+
+	i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
+	drm_WARN_ON(&i915->drm, register_oom_notifier(&i915->mm.oom_notifier));
+
+	i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
+	drm_WARN_ON(&i915->drm,
+		    register_vmap_purge_notifier(&i915->mm.vmap_notifier));
+}
+
+void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915)
+{
+	drm_WARN_ON(&i915->drm,
+		    unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
+	drm_WARN_ON(&i915->drm,
+		    unregister_oom_notifier(&i915->mm.oom_notifier));
+	unregister_shrinker(&i915->mm.shrinker);
+}
+
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+				    struct mutex *mutex)
+{
+	if (!IS_ENABLED(CONFIG_LOCKDEP))
+		return;
+
+	fs_reclaim_acquire(GFP_KERNEL);
+
+	mutex_acquire(&mutex->dep_map, 0, 0, _RET_IP_);
+	mutex_release(&mutex->dep_map, _RET_IP_);
+
+	fs_reclaim_release(GFP_KERNEL);
+}
+
+/**
+ * i915_gem_object_make_unshrinkable - Hide the object from the shrinker. By
+ * default all object types that support shrinking(see IS_SHRINKABLE), will also
+ * make the object visible to the shrinker after allocating the system memory
+ * pages.
+ * @obj: The GEM object.
+ *
+ * This is typically used for special kernel internal objects that can't be
+ * easily processed by the shrinker, like if they are perma-pinned.
+ */
+void i915_gem_object_make_unshrinkable(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = obj_to_i915(obj);
+	unsigned long flags;
+
+	/*
+	 * We can only be called while the pages are pinned or when
+	 * the pages are released. If pinned, we should only be called
+	 * from a single caller under controlled conditions; and on release
+	 * only one caller may release us. Neither the two may cross.
+	 */
+	if (atomic_add_unless(&obj->mm.shrink_pin, 1, 0))
+		return;
+
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	if (!atomic_fetch_inc(&obj->mm.shrink_pin) &&
+	    !list_empty(&obj->mm.link)) {
+		list_del_init(&obj->mm.link);
+		i915->mm.shrink_count--;
+		i915->mm.shrink_memory -= obj->base.size;
+	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+}
+
+static void ___i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj,
+					       struct list_head *head)
+{
+	struct drm_i915_private *i915 = obj_to_i915(obj);
+	unsigned long flags;
+
+	if (!i915_gem_object_is_shrinkable(obj))
+		return;
+
+	if (atomic_add_unless(&obj->mm.shrink_pin, -1, 1))
+		return;
+
+	spin_lock_irqsave(&i915->mm.obj_lock, flags);
+	GEM_BUG_ON(!kref_read(&obj->base.refcount));
+	if (atomic_dec_and_test(&obj->mm.shrink_pin)) {
+		GEM_BUG_ON(!list_empty(&obj->mm.link));
+
+		list_add_tail(&obj->mm.link, head);
+		i915->mm.shrink_count++;
+		i915->mm.shrink_memory += obj->base.size;
+
+	}
+	spin_unlock_irqrestore(&i915->mm.obj_lock, flags);
+}
+
+/**
+ * __i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+	___i915_gem_object_make_shrinkable(obj,
+					   &obj_to_i915(obj)->mm.shrink_list);
+}
+
+/**
+ * __i915_gem_object_make_purgeable - Move the object to the tail of the
+ * purgeable list. Objects on this list might be swapped out. Used with
+ * DONTNEED objects.
+ * @obj: The GEM object.
+ *
+ * DO NOT USE. This is intended to be called on very special objects that don't
+ * yet have mm.pages, but are guaranteed to have potentially reclaimable pages
+ * underneath.
+ */
+void __i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+	___i915_gem_object_make_shrinkable(obj,
+					   &obj_to_i915(obj)->mm.purge_list);
+}
+
+/**
+ * i915_gem_object_make_shrinkable - Move the object to the tail of the
+ * shrinkable list. Objects on this list might be swapped out. Used with
+ * WILLNEED objects.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
+void i915_gem_object_make_shrinkable(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	__i915_gem_object_make_shrinkable(obj);
+}
+
+/**
+ * i915_gem_object_make_purgeable - Move the object to the tail of the purgeable
+ * list. Used with DONTNEED objects. Unlike with shrinkable objects, the
+ * shrinker will attempt to discard the backing pages, instead of trying to swap
+ * them out.
+ * @obj: The GEM object.
+ *
+ * MUST only be called on objects which have backing pages.
+ *
+ * MUST be balanced with previous call to i915_gem_object_make_unshrinkable().
+ */
+void i915_gem_object_make_purgeable(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+	__i915_gem_object_make_purgeable(obj);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h
new file mode 100644
index 0000000000..8512470f6f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_SHRINKER_H__
+#define __I915_GEM_SHRINKER_H__
+
+#include <linux/bits.h>
+
+struct drm_i915_private;
+struct i915_gem_ww_ctx;
+struct mutex;
+
+/* i915_gem_shrinker.c */
+unsigned long i915_gem_shrink(struct i915_gem_ww_ctx *ww,
+			      struct drm_i915_private *i915,
+			      unsigned long target,
+			      unsigned long *nr_scanned,
+			      unsigned flags);
+#define I915_SHRINK_UNBOUND	BIT(0)
+#define I915_SHRINK_BOUND	BIT(1)
+#define I915_SHRINK_ACTIVE	BIT(2)
+#define I915_SHRINK_VMAPS	BIT(3)
+#define I915_SHRINK_WRITEBACK	BIT(4)
+
+unsigned long i915_gem_shrink_all(struct drm_i915_private *i915);
+void i915_gem_driver_register__shrinker(struct drm_i915_private *i915);
+void i915_gem_driver_unregister__shrinker(struct drm_i915_private *i915);
+void i915_gem_shrinker_taints_mutex(struct drm_i915_private *i915,
+				    struct mutex *mutex);
+
+#endif /* __I915_GEM_SHRINKER_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
new file mode 100644
index 0000000000..1a766d8e7c
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
@@ -0,0 +1,1012 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008-2012 Intel Corporation
+ */
+
+#include <linux/errno.h>
+#include <linux/mutex.h>
+
+#include <drm/drm_mm.h>
+#include <drm/i915_drm.h>
+
+#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_region.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_mcr.h"
+#include "gt/intel_gt_regs.h"
+#include "gt/intel_region_lmem.h"
+#include "i915_drv.h"
+#include "i915_gem_stolen.h"
+#include "i915_pci.h"
+#include "i915_reg.h"
+#include "i915_utils.h"
+#include "i915_vgpu.h"
+#include "intel_mchbar_regs.h"
+#include "intel_pci_config.h"
+
+/*
+ * The BIOS typically reserves some of the system's memory for the exclusive
+ * use of the integrated graphics. This memory is no longer available for
+ * use by the OS and so the user finds that his system has less memory
+ * available than he put in. We refer to this memory as stolen.
+ *
+ * The BIOS will allocate its framebuffer from the stolen memory. Our
+ * goal is try to reuse that object for our own fbcon which must always
+ * be available for panics. Anything else we can reuse the stolen memory
+ * for is a boon.
+ */
+
+int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *i915,
+					 struct drm_mm_node *node, u64 size,
+					 unsigned alignment, u64 start, u64 end)
+{
+	int ret;
+
+	if (!drm_mm_initialized(&i915->mm.stolen))
+		return -ENODEV;
+
+	/* WaSkipStolenMemoryFirstPage:bdw+ */
+	if (GRAPHICS_VER(i915) >= 8 && start < 4096)
+		start = 4096;
+
+	mutex_lock(&i915->mm.stolen_lock);
+	ret = drm_mm_insert_node_in_range(&i915->mm.stolen, node,
+					  size, alignment, 0,
+					  start, end, DRM_MM_INSERT_BEST);
+	mutex_unlock(&i915->mm.stolen_lock);
+
+	return ret;
+}
+
+int i915_gem_stolen_insert_node(struct drm_i915_private *i915,
+				struct drm_mm_node *node, u64 size,
+				unsigned alignment)
+{
+	return i915_gem_stolen_insert_node_in_range(i915, node,
+						    size, alignment,
+						    I915_GEM_STOLEN_BIAS,
+						    U64_MAX);
+}
+
+void i915_gem_stolen_remove_node(struct drm_i915_private *i915,
+				 struct drm_mm_node *node)
+{
+	mutex_lock(&i915->mm.stolen_lock);
+	drm_mm_remove_node(node);
+	mutex_unlock(&i915->mm.stolen_lock);
+}
+
+static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm)
+{
+	return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start;
+}
+
+static int adjust_stolen(struct drm_i915_private *i915,
+			 struct resource *dsm)
+{
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
+
+	if (!valid_stolen_size(i915, dsm))
+		return -EINVAL;
+
+	/*
+	 * Make sure we don't clobber the GTT if it's within stolen memory
+	 *
+	 * TODO: We have yet too encounter the case where the GTT wasn't at the
+	 * end of stolen. With that assumption we could simplify this.
+	 */
+	if (GRAPHICS_VER(i915) <= 4 &&
+	    !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) {
+		struct resource stolen[2] = {*dsm, *dsm};
+		struct resource ggtt_res;
+		resource_size_t ggtt_start;
+
+		ggtt_start = intel_uncore_read(uncore, PGTBL_CTL);
+		if (GRAPHICS_VER(i915) == 4)
+			ggtt_start = (ggtt_start & PGTBL_ADDRESS_LO_MASK) |
+				     (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28;
+		else
+			ggtt_start &= PGTBL_ADDRESS_LO_MASK;
+
+		ggtt_res = DEFINE_RES_MEM(ggtt_start, ggtt_total_entries(ggtt) * 4);
+
+		if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end)
+			stolen[0].end = ggtt_res.start;
+		if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end)
+			stolen[1].start = ggtt_res.end;
+
+		/* Pick the larger of the two chunks */
+		if (resource_size(&stolen[0]) > resource_size(&stolen[1]))
+			*dsm = stolen[0];
+		else
+			*dsm = stolen[1];
+
+		if (stolen[0].start != stolen[1].start ||
+		    stolen[0].end != stolen[1].end) {
+			drm_dbg(&i915->drm,
+				"GTT within stolen memory at %pR\n",
+				&ggtt_res);
+			drm_dbg(&i915->drm, "Stolen memory adjusted to %pR\n",
+				dsm);
+		}
+	}
+
+	if (!valid_stolen_size(i915, dsm))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int request_smem_stolen(struct drm_i915_private *i915,
+			       struct resource *dsm)
+{
+	struct resource *r;
+
+	/*
+	 * With stolen lmem, we don't need to request system memory for the
+	 * address range since it's local to the gpu.
+	 *
+	 * Starting MTL, in IGFX devices the stolen memory is exposed via
+	 * LMEMBAR and shall be considered similar to stolen lmem.
+	 */
+	if (HAS_LMEM(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915))
+		return 0;
+
+	/*
+	 * Verify that nothing else uses this physical address. Stolen
+	 * memory should be reserved by the BIOS and hidden from the
+	 * kernel. So if the region is already marked as busy, something
+	 * is seriously wrong.
+	 */
+	r = devm_request_mem_region(i915->drm.dev, dsm->start,
+				    resource_size(dsm),
+				    "Graphics Stolen Memory");
+	if (r == NULL) {
+		/*
+		 * One more attempt but this time requesting region from
+		 * start + 1, as we have seen that this resolves the region
+		 * conflict with the PCI Bus.
+		 * This is a BIOS w/a: Some BIOS wrap stolen in the root
+		 * PCI bus, but have an off-by-one error. Hence retry the
+		 * reservation starting from 1 instead of 0.
+		 * There's also BIOS with off-by-one on the other end.
+		 */
+		r = devm_request_mem_region(i915->drm.dev, dsm->start + 1,
+					    resource_size(dsm) - 2,
+					    "Graphics Stolen Memory");
+		/*
+		 * GEN3 firmware likes to smash pci bridges into the stolen
+		 * range. Apparently this works.
+		 */
+		if (!r && GRAPHICS_VER(i915) != 3) {
+			drm_err(&i915->drm,
+				"conflict detected with stolen region: %pR\n",
+				dsm);
+
+			return -EBUSY;
+		}
+	}
+
+	return 0;
+}
+
+static void i915_gem_cleanup_stolen(struct drm_i915_private *i915)
+{
+	if (!drm_mm_initialized(&i915->mm.stolen))
+		return;
+
+	drm_mm_takedown(&i915->mm.stolen);
+}
+
+static void g4x_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = intel_uncore_read(uncore,
+					IS_GM45(i915) ?
+					CTG_STOLEN_RESERVED :
+					ELK_STOLEN_RESERVED);
+	resource_size_t stolen_top = i915->dsm.stolen.end + 1;
+
+	drm_dbg(&i915->drm, "%s_STOLEN_RESERVED = %08x\n",
+		IS_GM45(i915) ? "CTG" : "ELK", reg_val);
+
+	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0)
+		return;
+
+	/*
+	 * Whether ILK really reuses the ELK register for this is unclear.
+	 * Let's see if we catch anyone with this supposedly enabled on ILK.
+	 */
+	drm_WARN(&i915->drm, GRAPHICS_VER(i915) == 5,
+		 "ILK stolen reserved found? 0x%08x\n",
+		 reg_val);
+
+	if (!(reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK))
+		return;
+
+	*base = (reg_val & G4X_STOLEN_RESERVED_ADDR2_MASK) << 16;
+	drm_WARN_ON(&i915->drm,
+		    (reg_val & G4X_STOLEN_RESERVED_ADDR1_MASK) < *base);
+
+	*size = stolen_top - *base;
+}
+
+static void gen6_get_stolen_reserved(struct drm_i915_private *i915,
+				     struct intel_uncore *uncore,
+				     resource_size_t *base,
+				     resource_size_t *size)
+{
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+
+	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
+
+	switch (reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK) {
+	case GEN6_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	case GEN6_STOLEN_RESERVED_512K:
+		*size = 512 * 1024;
+		break;
+	case GEN6_STOLEN_RESERVED_256K:
+		*size = 256 * 1024;
+		break;
+	case GEN6_STOLEN_RESERVED_128K:
+		*size = 128 * 1024;
+		break;
+	default:
+		*size = 1024 * 1024;
+		MISSING_CASE(reg_val & GEN6_STOLEN_RESERVED_SIZE_MASK);
+	}
+}
+
+static void vlv_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = i915->dsm.stolen.end + 1;
+
+	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) {
+	default:
+		MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK);
+		fallthrough;
+	case GEN7_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	}
+
+	/*
+	 * On vlv, the ADDR_MASK portion is left as 0 and HW deduces the
+	 * reserved location as (top - size).
+	 */
+	*base = stolen_top - *size;
+}
+
+static void gen7_get_stolen_reserved(struct drm_i915_private *i915,
+				     struct intel_uncore *uncore,
+				     resource_size_t *base,
+				     resource_size_t *size)
+{
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+
+	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	*base = reg_val & GEN7_STOLEN_RESERVED_ADDR_MASK;
+
+	switch (reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK) {
+	case GEN7_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	case GEN7_STOLEN_RESERVED_256K:
+		*size = 256 * 1024;
+		break;
+	default:
+		*size = 1024 * 1024;
+		MISSING_CASE(reg_val & GEN7_STOLEN_RESERVED_SIZE_MASK);
+	}
+}
+
+static void chv_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+
+	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
+
+	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
+	case GEN8_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_2M:
+		*size = 2 * 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_4M:
+		*size = 4 * 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_8M:
+		*size = 8 * 1024 * 1024;
+		break;
+	default:
+		*size = 8 * 1024 * 1024;
+		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
+	}
+}
+
+static void bdw_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u32 reg_val = intel_uncore_read(uncore, GEN6_STOLEN_RESERVED);
+	resource_size_t stolen_top = i915->dsm.stolen.end + 1;
+
+	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = %08x\n", reg_val);
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ENABLE))
+		return;
+
+	if (!(reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK))
+		return;
+
+	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
+	*size = stolen_top - *base;
+}
+
+static void icl_get_stolen_reserved(struct drm_i915_private *i915,
+				    struct intel_uncore *uncore,
+				    resource_size_t *base,
+				    resource_size_t *size)
+{
+	u64 reg_val = intel_uncore_read64(uncore, GEN6_STOLEN_RESERVED);
+
+	drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val);
+
+	switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) {
+	case GEN8_STOLEN_RESERVED_1M:
+		*size = 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_2M:
+		*size = 2 * 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_4M:
+		*size = 4 * 1024 * 1024;
+		break;
+	case GEN8_STOLEN_RESERVED_8M:
+		*size = 8 * 1024 * 1024;
+		break;
+	default:
+		*size = 8 * 1024 * 1024;
+		MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK);
+	}
+
+	if (HAS_LMEMBAR_SMEM_STOLEN(i915))
+		/* the base is initialized to stolen top so subtract size to get base */
+		*base -= *size;
+	else
+		*base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK;
+}
+
+/*
+ * Initialize i915->dsm.reserved to contain the reserved space within the Data
+ * Stolen Memory. This is a range on the top of DSM that is reserved, not to
+ * be used by driver, so must be excluded from the region passed to the
+ * allocator later. In the spec this is also called as WOPCM.
+ *
+ * Our expectation is that the reserved space is at the top of the stolen
+ * region, as it has been the case for every platform, and *never* at the
+ * bottom, so the calculation here can be simplified.
+ */
+static int init_reserved_stolen(struct drm_i915_private *i915)
+{
+	struct intel_uncore *uncore = &i915->uncore;
+	resource_size_t reserved_base, stolen_top;
+	resource_size_t reserved_size;
+	int ret = 0;
+
+	stolen_top = i915->dsm.stolen.end + 1;
+	reserved_base = stolen_top;
+	reserved_size = 0;
+
+	if (GRAPHICS_VER(i915) >= 11) {
+		icl_get_stolen_reserved(i915, uncore,
+					&reserved_base, &reserved_size);
+	} else if (GRAPHICS_VER(i915) >= 8) {
+		if (IS_LP(i915))
+			chv_get_stolen_reserved(i915, uncore,
+						&reserved_base, &reserved_size);
+		else
+			bdw_get_stolen_reserved(i915, uncore,
+						&reserved_base, &reserved_size);
+	} else if (GRAPHICS_VER(i915) >= 7) {
+		if (IS_VALLEYVIEW(i915))
+			vlv_get_stolen_reserved(i915, uncore,
+						&reserved_base, &reserved_size);
+		else
+			gen7_get_stolen_reserved(i915, uncore,
+						 &reserved_base, &reserved_size);
+	} else if (GRAPHICS_VER(i915) >= 6) {
+		gen6_get_stolen_reserved(i915, uncore,
+					 &reserved_base, &reserved_size);
+	} else if (GRAPHICS_VER(i915) >= 5 || IS_G4X(i915)) {
+		g4x_get_stolen_reserved(i915, uncore,
+					&reserved_base, &reserved_size);
+	}
+
+	/* No reserved stolen */
+	if (reserved_base == stolen_top)
+		goto bail_out;
+
+	if (!reserved_base) {
+		drm_err(&i915->drm,
+			"inconsistent reservation %pa + %pa; ignoring\n",
+			&reserved_base, &reserved_size);
+		ret = -EINVAL;
+		goto bail_out;
+	}
+
+	i915->dsm.reserved = DEFINE_RES_MEM(reserved_base, reserved_size);
+
+	if (!resource_contains(&i915->dsm.stolen, &i915->dsm.reserved)) {
+		drm_err(&i915->drm,
+			"Stolen reserved area %pR outside stolen memory %pR\n",
+			&i915->dsm.reserved, &i915->dsm.stolen);
+		ret = -EINVAL;
+		goto bail_out;
+	}
+
+	return 0;
+
+bail_out:
+	i915->dsm.reserved = DEFINE_RES_MEM(reserved_base, 0);
+
+	return ret;
+}
+
+static int i915_gem_init_stolen(struct intel_memory_region *mem)
+{
+	struct drm_i915_private *i915 = mem->i915;
+
+	mutex_init(&i915->mm.stolen_lock);
+
+	if (intel_vgpu_active(i915)) {
+		drm_notice(&i915->drm,
+			   "%s, disabling use of stolen memory\n",
+			   "iGVT-g active");
+		return -ENOSPC;
+	}
+
+	if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) {
+		drm_notice(&i915->drm,
+			   "%s, disabling use of stolen memory\n",
+			   "DMAR active");
+		return -ENOSPC;
+	}
+
+	if (adjust_stolen(i915, &mem->region))
+		return -ENOSPC;
+
+	if (request_smem_stolen(i915, &mem->region))
+		return -ENOSPC;
+
+	i915->dsm.stolen = mem->region;
+
+	if (init_reserved_stolen(i915))
+		return -ENOSPC;
+
+	/* Exclude the reserved region from driver use */
+	mem->region.end = i915->dsm.reserved.start - 1;
+	mem->io_size = min(mem->io_size, resource_size(&mem->region));
+
+	i915->dsm.usable_size = resource_size(&mem->region);
+
+	drm_dbg(&i915->drm,
+		"Memory reserved for graphics device: %lluK, usable: %lluK\n",
+		(u64)resource_size(&i915->dsm.stolen) >> 10,
+		(u64)i915->dsm.usable_size >> 10);
+
+	if (i915->dsm.usable_size == 0)
+		return -ENOSPC;
+
+	/* Basic memrange allocator for stolen space. */
+	drm_mm_init(&i915->mm.stolen, 0, i915->dsm.usable_size);
+
+	/*
+	 * Access to stolen lmem beyond certain size for MTL A0 stepping
+	 * would crash the machine. Disable stolen lmem for userspace access
+	 * by setting usable_size to zero.
+	 */
+	if (IS_METEORLAKE(i915) && INTEL_REVID(i915) == 0x0)
+		i915->dsm.usable_size = 0;
+
+	return 0;
+}
+
+static void dbg_poison(struct i915_ggtt *ggtt,
+		       dma_addr_t addr, resource_size_t size,
+		       u8 x)
+{
+#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
+	if (!drm_mm_node_allocated(&ggtt->error_capture))
+		return;
+
+	if (ggtt->vm.bind_async_flags & I915_VMA_GLOBAL_BIND)
+		return; /* beware stop_machine() inversion */
+
+	GEM_BUG_ON(!IS_ALIGNED(size, PAGE_SIZE));
+
+	mutex_lock(&ggtt->error_mutex);
+	while (size) {
+		void __iomem *s;
+
+		ggtt->vm.insert_page(&ggtt->vm, addr,
+				     ggtt->error_capture.start,
+				     i915_gem_get_pat_index(ggtt->vm.i915,
+							    I915_CACHE_NONE),
+				     0);
+		mb();
+
+		s = io_mapping_map_wc(&ggtt->iomap,
+				      ggtt->error_capture.start,
+				      PAGE_SIZE);
+		memset_io(s, x, PAGE_SIZE);
+		io_mapping_unmap(s);
+
+		addr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	mb();
+	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
+	mutex_unlock(&ggtt->error_mutex);
+#endif
+}
+
+static struct sg_table *
+i915_pages_create_for_stolen(struct drm_device *dev,
+			     resource_size_t offset, resource_size_t size)
+{
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct sg_table *st;
+	struct scatterlist *sg;
+
+	GEM_BUG_ON(range_overflows(offset, size, resource_size(&i915->dsm.stolen)));
+
+	/* We hide that we have no struct page backing our stolen object
+	 * by wrapping the contiguous physical allocation with a fake
+	 * dma mapping in a single scatterlist.
+	 */
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (st == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	if (sg_alloc_table(st, 1, GFP_KERNEL)) {
+		kfree(st);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	sg = st->sgl;
+	sg->offset = 0;
+	sg->length = size;
+
+	sg_dma_address(sg) = (dma_addr_t)i915->dsm.stolen.start + offset;
+	sg_dma_len(sg) = size;
+
+	return st;
+}
+
+static int i915_gem_object_get_pages_stolen(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *pages =
+		i915_pages_create_for_stolen(obj->base.dev,
+					     obj->stolen->start,
+					     obj->stolen->size);
+	if (IS_ERR(pages))
+		return PTR_ERR(pages);
+
+	dbg_poison(to_gt(i915)->ggtt,
+		   sg_dma_address(pages->sgl),
+		   sg_dma_len(pages->sgl),
+		   POISON_INUSE);
+
+	__i915_gem_object_set_pages(obj, pages);
+
+	return 0;
+}
+
+static void i915_gem_object_put_pages_stolen(struct drm_i915_gem_object *obj,
+					     struct sg_table *pages)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	/* Should only be called from i915_gem_object_release_stolen() */
+
+	dbg_poison(to_gt(i915)->ggtt,
+		   sg_dma_address(pages->sgl),
+		   sg_dma_len(pages->sgl),
+		   POISON_FREE);
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static void
+i915_gem_object_release_stolen(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen);
+
+	GEM_BUG_ON(!stolen);
+	i915_gem_stolen_remove_node(i915, stolen);
+	kfree(stolen);
+
+	i915_gem_object_release_memory_region(obj);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = {
+	.name = "i915_gem_object_stolen",
+	.get_pages = i915_gem_object_get_pages_stolen,
+	.put_pages = i915_gem_object_put_pages_stolen,
+	.release = i915_gem_object_release_stolen,
+};
+
+static int __i915_gem_object_create_stolen(struct intel_memory_region *mem,
+					   struct drm_i915_gem_object *obj,
+					   struct drm_mm_node *stolen)
+{
+	static struct lock_class_key lock_class;
+	unsigned int cache_level;
+	unsigned int flags;
+	int err;
+
+	/*
+	 * Stolen objects are always physically contiguous since we just
+	 * allocate one big block underneath using the drm_mm range allocator.
+	 */
+	flags = I915_BO_ALLOC_CONTIGUOUS;
+
+	drm_gem_private_object_init(&mem->i915->drm, &obj->base, stolen->size);
+	i915_gem_object_init(obj, &i915_gem_object_stolen_ops, &lock_class, flags);
+
+	obj->stolen = stolen;
+	obj->read_domains = I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT;
+	cache_level = HAS_LLC(mem->i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	if (WARN_ON(!i915_gem_object_trylock(obj, NULL)))
+		return -EBUSY;
+
+	i915_gem_object_init_memory_region(obj, mem);
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err)
+		i915_gem_object_release_memory_region(obj);
+	i915_gem_object_unlock(obj);
+
+	return err;
+}
+
+static int _i915_gem_object_stolen_init(struct intel_memory_region *mem,
+					struct drm_i915_gem_object *obj,
+					resource_size_t offset,
+					resource_size_t size,
+					resource_size_t page_size,
+					unsigned int flags)
+{
+	struct drm_i915_private *i915 = mem->i915;
+	struct drm_mm_node *stolen;
+	int ret;
+
+	if (!drm_mm_initialized(&i915->mm.stolen))
+		return -ENODEV;
+
+	if (size == 0)
+		return -EINVAL;
+
+	/*
+	 * With discrete devices, where we lack a mappable aperture there is no
+	 * possible way to ever access this memory on the CPU side.
+	 */
+	if (mem->type == INTEL_MEMORY_STOLEN_LOCAL && !mem->io_size &&
+	    !(flags & I915_BO_ALLOC_GPU_ONLY))
+		return -ENOSPC;
+
+	stolen = kzalloc(sizeof(*stolen), GFP_KERNEL);
+	if (!stolen)
+		return -ENOMEM;
+
+	if (offset != I915_BO_INVALID_OFFSET) {
+		drm_dbg(&i915->drm,
+			"creating preallocated stolen object: stolen_offset=%pa, size=%pa\n",
+			&offset, &size);
+
+		stolen->start = offset;
+		stolen->size = size;
+		mutex_lock(&i915->mm.stolen_lock);
+		ret = drm_mm_reserve_node(&i915->mm.stolen, stolen);
+		mutex_unlock(&i915->mm.stolen_lock);
+	} else {
+		ret = i915_gem_stolen_insert_node(i915, stolen, size,
+						  mem->min_page_size);
+	}
+	if (ret)
+		goto err_free;
+
+	ret = __i915_gem_object_create_stolen(mem, obj, stolen);
+	if (ret)
+		goto err_remove;
+
+	return 0;
+
+err_remove:
+	i915_gem_stolen_remove_node(i915, stolen);
+err_free:
+	kfree(stolen);
+	return ret;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_i915_private *i915,
+			      resource_size_t size)
+{
+	return i915_gem_object_create_region(i915->mm.stolen_region, size, 0, 0);
+}
+
+static int init_stolen_smem(struct intel_memory_region *mem)
+{
+	int err;
+
+	/*
+	 * Initialise stolen early so that we may reserve preallocated
+	 * objects for the BIOS to KMS transition.
+	 */
+	err = i915_gem_init_stolen(mem);
+	if (err)
+		drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
+
+	return 0;
+}
+
+static int release_stolen_smem(struct intel_memory_region *mem)
+{
+	i915_gem_cleanup_stolen(mem->i915);
+	return 0;
+}
+
+static const struct intel_memory_region_ops i915_region_stolen_smem_ops = {
+	.init = init_stolen_smem,
+	.release = release_stolen_smem,
+	.init_object = _i915_gem_object_stolen_init,
+};
+
+static int init_stolen_lmem(struct intel_memory_region *mem)
+{
+	struct drm_i915_private *i915 = mem->i915;
+	int err;
+
+	if (GEM_WARN_ON(resource_size(&mem->region) == 0))
+		return 0;
+
+	err = i915_gem_init_stolen(mem);
+	if (err) {
+		drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n");
+		return 0;
+	}
+
+	if (mem->io_size &&
+	    !io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size))
+		goto err_cleanup;
+
+	drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n",
+		&mem->io_start);
+	drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start);
+
+	return 0;
+
+err_cleanup:
+	i915_gem_cleanup_stolen(mem->i915);
+	return err;
+}
+
+static int release_stolen_lmem(struct intel_memory_region *mem)
+{
+	if (mem->io_size)
+		io_mapping_fini(&mem->iomap);
+	i915_gem_cleanup_stolen(mem->i915);
+	return 0;
+}
+
+static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = {
+	.init = init_stolen_lmem,
+	.release = release_stolen_lmem,
+	.init_object = _i915_gem_object_stolen_init,
+};
+
+static int mtl_get_gms_size(struct intel_uncore *uncore)
+{
+	u16 ggc, gms;
+
+	ggc = intel_uncore_read16(uncore, GGC);
+
+	/* check GGMS, should be fixed 0x3 (8MB) */
+	if ((ggc & GGMS_MASK) != GGMS_MASK)
+		return -EIO;
+
+	/* return valid GMS value, -EIO if invalid */
+	gms = REG_FIELD_GET(GMS_MASK, ggc);
+	switch (gms) {
+	case 0x0 ... 0x04:
+		return gms * 32;
+	case 0xf0 ... 0xfe:
+		return (gms - 0xf0 + 1) * 4;
+	default:
+		MISSING_CASE(gms);
+		return -EIO;
+	}
+}
+
+struct intel_memory_region *
+i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
+			   u16 instance)
+{
+	struct intel_uncore *uncore = &i915->uncore;
+	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
+	resource_size_t dsm_size, dsm_base, lmem_size;
+	struct intel_memory_region *mem;
+	resource_size_t io_start, io_size;
+	resource_size_t min_page_size;
+	int ret;
+
+	if (WARN_ON_ONCE(instance))
+		return ERR_PTR(-ENODEV);
+
+	if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR))
+		return ERR_PTR(-ENXIO);
+
+	if (HAS_LMEMBAR_SMEM_STOLEN(i915) || IS_DG1(i915)) {
+		lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
+	} else {
+		resource_size_t lmem_range;
+
+		lmem_range = intel_gt_mcr_read_any(to_gt(i915), XEHP_TILE0_ADDR_RANGE) & 0xFFFF;
+		lmem_size = lmem_range >> XEHP_TILE_LMEM_RANGE_SHIFT;
+		lmem_size *= SZ_1G;
+	}
+
+	if (HAS_LMEMBAR_SMEM_STOLEN(i915)) {
+		/*
+		 * MTL dsm size is in GGC register.
+		 * Also MTL uses offset to GSMBASE in ptes, so i915
+		 * uses dsm_base = 8MBs to setup stolen region, since
+		 * DSMBASE = GSMBASE + 8MB.
+		 */
+		ret = mtl_get_gms_size(uncore);
+		if (ret < 0) {
+			drm_err(&i915->drm, "invalid MTL GGC register setting\n");
+			return ERR_PTR(ret);
+		}
+
+		dsm_base = SZ_8M;
+		dsm_size = (resource_size_t)(ret * SZ_1M);
+
+		GEM_BUG_ON(pci_resource_len(pdev, GEN12_LMEM_BAR) != SZ_256M);
+		GEM_BUG_ON((dsm_base + dsm_size) > lmem_size);
+	} else {
+		/* Use DSM base address instead for stolen memory */
+		dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK;
+		if (WARN_ON(lmem_size < dsm_base))
+			return ERR_PTR(-ENODEV);
+		dsm_size = ALIGN_DOWN(lmem_size - dsm_base, SZ_1M);
+	}
+
+	if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) {
+		io_start = 0;
+		io_size = 0;
+	} else {
+		io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base;
+		io_size = dsm_size;
+	}
+
+	min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K :
+						I915_GTT_PAGE_SIZE_4K;
+
+	mem = intel_memory_region_create(i915, dsm_base, dsm_size,
+					 min_page_size,
+					 io_start, io_size,
+					 type, instance,
+					 &i915_region_stolen_lmem_ops);
+	if (IS_ERR(mem))
+		return mem;
+
+	intel_memory_region_set_name(mem, "stolen-local");
+
+	mem->private = true;
+
+	return mem;
+}
+
+struct intel_memory_region*
+i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
+			   u16 instance)
+{
+	struct intel_memory_region *mem;
+
+	mem = intel_memory_region_create(i915,
+					 intel_graphics_stolen_res.start,
+					 resource_size(&intel_graphics_stolen_res),
+					 PAGE_SIZE, 0, 0, type, instance,
+					 &i915_region_stolen_smem_ops);
+	if (IS_ERR(mem))
+		return mem;
+
+	intel_memory_region_set_name(mem, "stolen-system");
+
+	mem->private = true;
+
+	return mem;
+}
+
+bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj)
+{
+	return obj->ops == &i915_gem_object_stolen_ops;
+}
+
+bool i915_gem_stolen_initialized(const struct drm_i915_private *i915)
+{
+	return drm_mm_initialized(&i915->mm.stolen);
+}
+
+u64 i915_gem_stolen_area_address(const struct drm_i915_private *i915)
+{
+	return i915->dsm.stolen.start;
+}
+
+u64 i915_gem_stolen_area_size(const struct drm_i915_private *i915)
+{
+	return resource_size(&i915->dsm.stolen);
+}
+
+u64 i915_gem_stolen_node_address(const struct drm_i915_private *i915,
+				 const struct drm_mm_node *node)
+{
+	return i915->dsm.stolen.start + i915_gem_stolen_node_offset(node);
+}
+
+bool i915_gem_stolen_node_allocated(const struct drm_mm_node *node)
+{
+	return drm_mm_node_allocated(node);
+}
+
+u64 i915_gem_stolen_node_offset(const struct drm_mm_node *node)
+{
+	return node->start;
+}
+
+u64 i915_gem_stolen_node_size(const struct drm_mm_node *node)
+{
+	return node->size;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.h b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
new file mode 100644
index 0000000000..258381d1c0
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#ifndef __I915_GEM_STOLEN_H__
+#define __I915_GEM_STOLEN_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+struct drm_mm_node;
+struct drm_i915_gem_object;
+
+#define i915_stolen_fb drm_mm_node
+
+int i915_gem_stolen_insert_node(struct drm_i915_private *dev_priv,
+				struct drm_mm_node *node, u64 size,
+				unsigned alignment);
+int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
+					 struct drm_mm_node *node, u64 size,
+					 unsigned alignment, u64 start,
+					 u64 end);
+void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
+				 struct drm_mm_node *node);
+struct intel_memory_region *
+i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type,
+			   u16 instance);
+struct intel_memory_region *
+i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type,
+			   u16 instance);
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+			      resource_size_t size);
+
+bool i915_gem_object_is_stolen(const struct drm_i915_gem_object *obj);
+
+#define I915_GEM_STOLEN_BIAS SZ_128K
+
+bool i915_gem_stolen_initialized(const struct drm_i915_private *i915);
+u64 i915_gem_stolen_area_address(const struct drm_i915_private *i915);
+u64 i915_gem_stolen_area_size(const struct drm_i915_private *i915);
+
+u64 i915_gem_stolen_node_address(const struct drm_i915_private *i915,
+				 const struct drm_mm_node *node);
+
+bool i915_gem_stolen_node_allocated(const struct drm_mm_node *node);
+u64 i915_gem_stolen_node_offset(const struct drm_mm_node *node);
+u64 i915_gem_stolen_node_size(const struct drm_mm_node *node);
+
+#endif /* __I915_GEM_STOLEN_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_throttle.c b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
new file mode 100644
index 0000000000..af85d0c281
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_throttle.c
@@ -0,0 +1,102 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2014-2016 Intel Corporation
+ */
+
+#include <linux/jiffies.h>
+
+#include <drm/drm_file.h>
+
+#include "i915_drv.h"
+#include "i915_file_private.h"
+#include "i915_gem_context.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+/*
+ * 20ms is a fairly arbitrary limit (greater than the average frame time)
+ * chosen to prevent the CPU getting more than a frame ahead of the GPU
+ * (when using lax throttling for the frontbuffer). We also use it to
+ * offer free GPU waitboosts for severely congested workloads.
+ */
+#define DRM_I915_THROTTLE_JIFFIES msecs_to_jiffies(20)
+
+/*
+ * Throttle our rendering by waiting until the ring has completed our requests
+ * emitted over 20 msec ago.
+ *
+ * Note that if we were to use the current jiffies each time around the loop,
+ * we wouldn't escape the function with any frames outstanding if the time to
+ * render a frame was over 20ms.
+ *
+ * This should get us reasonable parallelism between CPU and GPU but also
+ * relatively low latency when blocking on a particular request to finish.
+ */
+int
+i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *file)
+{
+	const unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
+	struct drm_i915_file_private *file_priv = file->driver_priv;
+	struct drm_i915_private *i915 = to_i915(dev);
+	struct i915_gem_context *ctx;
+	unsigned long idx;
+	long ret;
+
+	/* ABI: return -EIO if already wedged */
+	ret = intel_gt_terminally_wedged(to_gt(i915));
+	if (ret)
+		return ret;
+
+	rcu_read_lock();
+	xa_for_each(&file_priv->context_xa, idx, ctx) {
+		struct i915_gem_engines_iter it;
+		struct intel_context *ce;
+
+		if (!kref_get_unless_zero(&ctx->ref))
+			continue;
+		rcu_read_unlock();
+
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx),
+				    it) {
+			struct i915_request *rq, *target = NULL;
+
+			if (!ce->timeline)
+				continue;
+
+			mutex_lock(&ce->timeline->mutex);
+			list_for_each_entry_reverse(rq,
+						    &ce->timeline->requests,
+						    link) {
+				if (i915_request_completed(rq))
+					break;
+
+				if (time_after(rq->emitted_jiffies,
+					       recent_enough))
+					continue;
+
+				target = i915_request_get(rq);
+				break;
+			}
+			mutex_unlock(&ce->timeline->mutex);
+			if (!target)
+				continue;
+
+			ret = i915_request_wait(target,
+						I915_WAIT_INTERRUPTIBLE,
+						MAX_SCHEDULE_TIMEOUT);
+			i915_request_put(target);
+			if (ret < 0)
+				break;
+		}
+		i915_gem_context_unlock_engines(ctx);
+		i915_gem_context_put(ctx);
+
+		rcu_read_lock();
+	}
+	rcu_read_unlock();
+
+	return ret < 0 ? ret : 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.c b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
new file mode 100644
index 0000000000..a049ca0b79
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.c
@@ -0,0 +1,472 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2008 Intel Corporation
+ */
+
+#include <linux/string.h>
+#include <linux/bitops.h>
+
+#include "i915_drv.h"
+#include "i915_gem.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_mman.h"
+#include "i915_gem_object.h"
+#include "i915_gem_tiling.h"
+#include "i915_reg.h"
+
+/**
+ * DOC: buffer object tiling
+ *
+ * i915_gem_set_tiling_ioctl() and i915_gem_get_tiling_ioctl() is the userspace
+ * interface to declare fence register requirements.
+ *
+ * In principle GEM doesn't care at all about the internal data layout of an
+ * object, and hence it also doesn't care about tiling or swizzling. There's two
+ * exceptions:
+ *
+ * - For X and Y tiling the hardware provides detilers for CPU access, so called
+ *   fences. Since there's only a limited amount of them the kernel must manage
+ *   these, and therefore userspace must tell the kernel the object tiling if it
+ *   wants to use fences for detiling.
+ * - On gen3 and gen4 platforms have a swizzling pattern for tiled objects which
+ *   depends upon the physical page frame number. When swapping such objects the
+ *   page frame number might change and the kernel must be able to fix this up
+ *   and hence now the tiling. Note that on a subset of platforms with
+ *   asymmetric memory channel population the swizzling pattern changes in an
+ *   unknown way, and for those the kernel simply forbids swapping completely.
+ *
+ * Since neither of this applies for new tiling layouts on modern platforms like
+ * W, Ys and Yf tiling GEM only allows object tiling to be set to X or Y tiled.
+ * Anything else can be handled in userspace entirely without the kernel's
+ * invovlement.
+ */
+
+/**
+ * i915_gem_fence_size - required global GTT size for a fence
+ * @i915: i915 device
+ * @size: object size
+ * @tiling: tiling mode
+ * @stride: tiling stride
+ *
+ * Return the required global GTT size for a fence (view of a tiled object),
+ * taking into account potential fence register mapping.
+ */
+u32 i915_gem_fence_size(struct drm_i915_private *i915,
+			u32 size, unsigned int tiling, unsigned int stride)
+{
+	u32 ggtt_size;
+
+	GEM_BUG_ON(!size);
+
+	if (tiling == I915_TILING_NONE)
+		return size;
+
+	GEM_BUG_ON(!stride);
+
+	if (GRAPHICS_VER(i915) >= 4) {
+		stride *= i915_gem_tile_height(tiling);
+		GEM_BUG_ON(!IS_ALIGNED(stride, I965_FENCE_PAGE));
+		return roundup(size, stride);
+	}
+
+	/* Previous chips need a power-of-two fence region when tiling */
+	if (GRAPHICS_VER(i915) == 3)
+		ggtt_size = 1024*1024;
+	else
+		ggtt_size = 512*1024;
+
+	while (ggtt_size < size)
+		ggtt_size <<= 1;
+
+	return ggtt_size;
+}
+
+/**
+ * i915_gem_fence_alignment - required global GTT alignment for a fence
+ * @i915: i915 device
+ * @size: object size
+ * @tiling: tiling mode
+ * @stride: tiling stride
+ *
+ * Return the required global GTT alignment for a fence (a view of a tiled
+ * object), taking into account potential fence register mapping.
+ */
+u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size,
+			     unsigned int tiling, unsigned int stride)
+{
+	GEM_BUG_ON(!size);
+
+	/*
+	 * Minimum alignment is 4k (GTT page size), but might be greater
+	 * if a fence register is needed for the object.
+	 */
+	if (tiling == I915_TILING_NONE)
+		return I915_GTT_MIN_ALIGNMENT;
+
+	if (GRAPHICS_VER(i915) >= 4)
+		return I965_FENCE_PAGE;
+
+	/*
+	 * Previous chips need to be aligned to the size of the smallest
+	 * fence register that can contain the object.
+	 */
+	return i915_gem_fence_size(i915, size, tiling, stride);
+}
+
+/* Check pitch constraints for all chips & tiling formats */
+static bool
+i915_tiling_ok(struct drm_i915_gem_object *obj,
+	       unsigned int tiling, unsigned int stride)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	unsigned int tile_width;
+
+	/* Linear is always fine */
+	if (tiling == I915_TILING_NONE)
+		return true;
+
+	if (tiling > I915_TILING_LAST)
+		return false;
+
+	/* check maximum stride & object size */
+	/* i965+ stores the end address of the gtt mapping in the fence
+	 * reg, so dont bother to check the size */
+	if (GRAPHICS_VER(i915) >= 7) {
+		if (stride / 128 > GEN7_FENCE_MAX_PITCH_VAL)
+			return false;
+	} else if (GRAPHICS_VER(i915) >= 4) {
+		if (stride / 128 > I965_FENCE_MAX_PITCH_VAL)
+			return false;
+	} else {
+		if (stride > 8192)
+			return false;
+
+		if (!is_power_of_2(stride))
+			return false;
+	}
+
+	if (GRAPHICS_VER(i915) == 2 ||
+	    (tiling == I915_TILING_Y && HAS_128_BYTE_Y_TILING(i915)))
+		tile_width = 128;
+	else
+		tile_width = 512;
+
+	if (!stride || !IS_ALIGNED(stride, tile_width))
+		return false;
+
+	return true;
+}
+
+static bool i915_vma_fence_prepare(struct i915_vma *vma,
+				   int tiling_mode, unsigned int stride)
+{
+	struct drm_i915_private *i915 = vma->vm->i915;
+	u32 size, alignment;
+
+	if (!i915_vma_is_map_and_fenceable(vma))
+		return true;
+
+	size = i915_gem_fence_size(i915, vma->size, tiling_mode, stride);
+	if (i915_vma_size(vma) < size)
+		return false;
+
+	alignment = i915_gem_fence_alignment(i915, vma->size, tiling_mode, stride);
+	if (!IS_ALIGNED(i915_ggtt_offset(vma), alignment))
+		return false;
+
+	return true;
+}
+
+/* Make the current GTT allocation valid for the change in tiling. */
+static int
+i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
+			      int tiling_mode, unsigned int stride)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+	struct i915_vma *vma, *vn;
+	LIST_HEAD(unbind);
+	int ret = 0;
+
+	if (tiling_mode == I915_TILING_NONE)
+		return 0;
+
+	mutex_lock(&ggtt->vm.mutex);
+
+	spin_lock(&obj->vma.lock);
+	for_each_ggtt_vma(vma, obj) {
+		GEM_BUG_ON(vma->vm != &ggtt->vm);
+
+		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
+			continue;
+
+		list_move(&vma->vm_link, &unbind);
+	}
+	spin_unlock(&obj->vma.lock);
+
+	list_for_each_entry_safe(vma, vn, &unbind, vm_link) {
+		ret = __i915_vma_unbind(vma);
+		if (ret) {
+			/* Restore the remaining vma on an error */
+			list_splice(&unbind, &ggtt->vm.bound_list);
+			break;
+		}
+	}
+
+	mutex_unlock(&ggtt->vm.mutex);
+
+	return ret;
+}
+
+bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+
+	return to_gt(i915)->ggtt->bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
+		i915_gem_object_is_tiled(obj);
+}
+
+int
+i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
+			   unsigned int tiling, unsigned int stride)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+	int err;
+
+	/* Make sure we don't cross-contaminate obj->tiling_and_stride */
+	BUILD_BUG_ON(I915_TILING_LAST & STRIDE_MASK);
+
+	GEM_BUG_ON(!i915_tiling_ok(obj, tiling, stride));
+	GEM_BUG_ON(!stride ^ (tiling == I915_TILING_NONE));
+
+	if ((tiling | stride) == obj->tiling_and_stride)
+		return 0;
+
+	if (i915_gem_object_is_framebuffer(obj))
+		return -EBUSY;
+
+	/* We need to rebind the object if its current allocation
+	 * no longer meets the alignment restrictions for its new
+	 * tiling mode. Otherwise we can just leave it alone, but
+	 * need to ensure that any fence register is updated before
+	 * the next fenced (either through the GTT or by the BLT unit
+	 * on older GPUs) access.
+	 *
+	 * After updating the tiling parameters, we then flag whether
+	 * we need to update an associated fence register. Note this
+	 * has to also include the unfenced register the GPU uses
+	 * whilst executing a fenced command for an untiled object.
+	 */
+
+	i915_gem_object_lock(obj, NULL);
+	if (i915_gem_object_is_framebuffer(obj)) {
+		i915_gem_object_unlock(obj);
+		return -EBUSY;
+	}
+
+	err = i915_gem_object_fence_prepare(obj, tiling, stride);
+	if (err) {
+		i915_gem_object_unlock(obj);
+		return err;
+	}
+
+	/* If the memory has unknown (i.e. varying) swizzling, we pin the
+	 * pages to prevent them being swapped out and causing corruption
+	 * due to the change in swizzling.
+	 */
+	if (i915_gem_object_has_pages(obj) &&
+	    obj->mm.madv == I915_MADV_WILLNEED &&
+	    i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES) {
+		if (tiling == I915_TILING_NONE) {
+			GEM_BUG_ON(!i915_gem_object_has_tiling_quirk(obj));
+			i915_gem_object_clear_tiling_quirk(obj);
+			i915_gem_object_make_shrinkable(obj);
+		}
+		if (!i915_gem_object_is_tiled(obj)) {
+			GEM_BUG_ON(i915_gem_object_has_tiling_quirk(obj));
+			i915_gem_object_make_unshrinkable(obj);
+			i915_gem_object_set_tiling_quirk(obj);
+		}
+	}
+
+	spin_lock(&obj->vma.lock);
+	for_each_ggtt_vma(vma, obj) {
+		vma->fence_size =
+			i915_gem_fence_size(i915, vma->size, tiling, stride);
+		vma->fence_alignment =
+			i915_gem_fence_alignment(i915,
+						 vma->size, tiling, stride);
+
+		if (vma->fence)
+			vma->fence->dirty = true;
+	}
+	spin_unlock(&obj->vma.lock);
+
+	obj->tiling_and_stride = tiling | stride;
+
+	/* Try to preallocate memory required to save swizzling on put-pages */
+	if (i915_gem_object_needs_bit17_swizzle(obj)) {
+		if (!obj->bit_17) {
+			obj->bit_17 = bitmap_zalloc(obj->base.size >> PAGE_SHIFT,
+						    GFP_KERNEL);
+		}
+	} else {
+		bitmap_free(obj->bit_17);
+		obj->bit_17 = NULL;
+	}
+
+	i915_gem_object_unlock(obj);
+
+	/* Force the fence to be reacquired for GTT access */
+	i915_gem_object_release_mmap_gtt(obj);
+
+	return 0;
+}
+
+/**
+ * i915_gem_set_tiling_ioctl - IOCTL handler to set tiling mode
+ * @dev: DRM device
+ * @data: data pointer for the ioctl
+ * @file: DRM file for the ioctl call
+ *
+ * Sets the tiling mode of an object, returning the required swizzling of
+ * bit 6 of addresses in the object.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int
+i915_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_set_tiling *args = data;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	if (!to_gt(dev_priv)->ggtt->num_fences)
+		return -EOPNOTSUPP;
+
+	obj = i915_gem_object_lookup(file, args->handle);
+	if (!obj)
+		return -ENOENT;
+
+	/*
+	 * The tiling mode of proxy objects is handled by its generator, and
+	 * not allowed to be changed by userspace.
+	 */
+	if (i915_gem_object_is_proxy(obj)) {
+		err = -ENXIO;
+		goto err;
+	}
+
+	if (!i915_tiling_ok(obj, args->tiling_mode, args->stride)) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	if (args->tiling_mode == I915_TILING_NONE) {
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+		args->stride = 0;
+	} else {
+		if (args->tiling_mode == I915_TILING_X)
+			args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
+		else
+			args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
+
+		/* Hide bit 17 swizzling from the user.  This prevents old Mesa
+		 * from aborting the application on sw fallbacks to bit 17,
+		 * and we use the pread/pwrite bit17 paths to swizzle for it.
+		 * If there was a user that was relying on the swizzle
+		 * information for drm_intel_bo_map()ed reads/writes this would
+		 * break it, but we don't have any of those.
+		 */
+		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
+			args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
+		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
+			args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
+
+		/* If we can't handle the swizzling, make it untiled. */
+		if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) {
+			args->tiling_mode = I915_TILING_NONE;
+			args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+			args->stride = 0;
+		}
+	}
+
+	err = i915_gem_object_set_tiling(obj, args->tiling_mode, args->stride);
+
+	/* We have to maintain this existing ABI... */
+	args->stride = i915_gem_object_get_stride(obj);
+	args->tiling_mode = i915_gem_object_get_tiling(obj);
+
+err:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+/**
+ * i915_gem_get_tiling_ioctl - IOCTL handler to get tiling mode
+ * @dev: DRM device
+ * @data: data pointer for the ioctl
+ * @file: DRM file for the ioctl call
+ *
+ * Returns the current tiling mode and required bit 6 swizzling for the object.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int
+i915_gem_get_tiling_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file)
+{
+	struct drm_i915_gem_get_tiling *args = data;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_object *obj;
+	int err = -ENOENT;
+
+	if (!to_gt(dev_priv)->ggtt->num_fences)
+		return -EOPNOTSUPP;
+
+	rcu_read_lock();
+	obj = i915_gem_object_lookup_rcu(file, args->handle);
+	if (obj) {
+		args->tiling_mode =
+			READ_ONCE(obj->tiling_and_stride) & TILING_MASK;
+		err = 0;
+	}
+	rcu_read_unlock();
+	if (unlikely(err))
+		return err;
+
+	switch (args->tiling_mode) {
+	case I915_TILING_X:
+		args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_x;
+		break;
+	case I915_TILING_Y:
+		args->swizzle_mode = to_gt(dev_priv)->ggtt->bit_6_swizzle_y;
+		break;
+	default:
+	case I915_TILING_NONE:
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
+		break;
+	}
+
+	/* Hide bit 17 from the user -- see comment in i915_gem_set_tiling */
+	if (dev_priv->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
+		args->phys_swizzle_mode = I915_BIT_6_SWIZZLE_UNKNOWN;
+	else
+		args->phys_swizzle_mode = args->swizzle_mode;
+	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_17)
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_9;
+	if (args->swizzle_mode == I915_BIT_6_SWIZZLE_9_10_17)
+		args->swizzle_mode = I915_BIT_6_SWIZZLE_9_10;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_tiling.h b/drivers/gpu/drm/i915/gem/i915_gem_tiling.h
new file mode 100644
index 0000000000..6bd5751abf
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_tiling.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __I915_GEM_TILING_H__
+#define __I915_GEM_TILING_H__
+
+#include <linux/types.h>
+
+struct drm_i915_gem_object;
+struct drm_i915_private;
+
+bool i915_gem_object_needs_bit17_swizzle(struct drm_i915_gem_object *obj);
+u32 i915_gem_fence_size(struct drm_i915_private *i915, u32 size,
+			unsigned int tiling, unsigned int stride);
+u32 i915_gem_fence_alignment(struct drm_i915_private *i915, u32 size,
+			     unsigned int tiling, unsigned int stride);
+
+#endif /* __I915_GEM_TILING_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
new file mode 100644
index 0000000000..9227f8146a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
@@ -0,0 +1,1384 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <linux/shmem_fs.h>
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_buddy.h>
+
+#include "i915_drv.h"
+#include "i915_ttm_buddy_manager.h"
+#include "intel_memory_region.h"
+#include "intel_region_ttm.h"
+
+#include "gem/i915_gem_mman.h"
+#include "gem/i915_gem_object.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
+#include "gem/i915_gem_ttm_pm.h"
+#include "gt/intel_gpu_commands.h"
+
+#define I915_TTM_PRIO_PURGE     0
+#define I915_TTM_PRIO_NO_PAGES  1
+#define I915_TTM_PRIO_HAS_PAGES 2
+#define I915_TTM_PRIO_NEEDS_CPU_ACCESS 3
+
+/*
+ * Size of struct ttm_place vector in on-stack struct ttm_placement allocs
+ */
+#define I915_TTM_MAX_PLACEMENTS INTEL_REGION_UNKNOWN
+
+/**
+ * struct i915_ttm_tt - TTM page vector with additional private information
+ * @ttm: The base TTM page vector.
+ * @dev: The struct device used for dma mapping and unmapping.
+ * @cached_rsgt: The cached scatter-gather table.
+ * @is_shmem: Set if using shmem.
+ * @filp: The shmem file, if using shmem backend.
+ *
+ * Note that DMA may be going on right up to the point where the page-
+ * vector is unpopulated in delayed destroy. Hence keep the
+ * scatter-gather table mapped and cached up to that point. This is
+ * different from the cached gem object io scatter-gather table which
+ * doesn't have an associated dma mapping.
+ */
+struct i915_ttm_tt {
+	struct ttm_tt ttm;
+	struct device *dev;
+	struct i915_refct_sgt cached_rsgt;
+
+	bool is_shmem;
+	struct file *filp;
+};
+
+static const struct ttm_place sys_placement_flags = {
+	.fpfn = 0,
+	.lpfn = 0,
+	.mem_type = I915_PL_SYSTEM,
+	.flags = 0,
+};
+
+static struct ttm_placement i915_sys_placement = {
+	.num_placement = 1,
+	.placement = &sys_placement_flags,
+	.num_busy_placement = 1,
+	.busy_placement = &sys_placement_flags,
+};
+
+/**
+ * i915_ttm_sys_placement - Return the struct ttm_placement to be
+ * used for an object in system memory.
+ *
+ * Rather than making the struct extern, use this
+ * function.
+ *
+ * Return: A pointer to a static variable for sys placement.
+ */
+struct ttm_placement *i915_ttm_sys_placement(void)
+{
+	return &i915_sys_placement;
+}
+
+static int i915_ttm_err_to_gem(int err)
+{
+	/* Fastpath */
+	if (likely(!err))
+		return 0;
+
+	switch (err) {
+	case -EBUSY:
+		/*
+		 * TTM likes to convert -EDEADLK to -EBUSY, and wants us to
+		 * restart the operation, since we don't record the contending
+		 * lock. We use -EAGAIN to restart.
+		 */
+		return -EAGAIN;
+	case -ENOSPC:
+		/*
+		 * Memory type / region is full, and we can't evict.
+		 * Except possibly system, that returns -ENOMEM;
+		 */
+		return -ENXIO;
+	default:
+		break;
+	}
+
+	return err;
+}
+
+static enum ttm_caching
+i915_ttm_select_tt_caching(const struct drm_i915_gem_object *obj)
+{
+	/*
+	 * Objects only allowed in system get cached cpu-mappings, or when
+	 * evicting lmem-only buffers to system for swapping. Other objects get
+	 * WC mapping for now. Even if in system.
+	 */
+	if (obj->mm.n_placements <= 1)
+		return ttm_cached;
+
+	return ttm_write_combined;
+}
+
+static void
+i915_ttm_place_from_region(const struct intel_memory_region *mr,
+			   struct ttm_place *place,
+			   resource_size_t offset,
+			   resource_size_t size,
+			   unsigned int flags)
+{
+	memset(place, 0, sizeof(*place));
+	place->mem_type = intel_region_to_ttm_type(mr);
+
+	if (mr->type == INTEL_MEMORY_SYSTEM)
+		return;
+
+	if (flags & I915_BO_ALLOC_CONTIGUOUS)
+		place->flags |= TTM_PL_FLAG_CONTIGUOUS;
+	if (offset != I915_BO_INVALID_OFFSET) {
+		WARN_ON(overflows_type(offset >> PAGE_SHIFT, place->fpfn));
+		place->fpfn = offset >> PAGE_SHIFT;
+		WARN_ON(overflows_type(place->fpfn + (size >> PAGE_SHIFT), place->lpfn));
+		place->lpfn = place->fpfn + (size >> PAGE_SHIFT);
+	} else if (mr->io_size && mr->io_size < mr->total) {
+		if (flags & I915_BO_ALLOC_GPU_ONLY) {
+			place->flags |= TTM_PL_FLAG_TOPDOWN;
+		} else {
+			place->fpfn = 0;
+			WARN_ON(overflows_type(mr->io_size >> PAGE_SHIFT, place->lpfn));
+			place->lpfn = mr->io_size >> PAGE_SHIFT;
+		}
+	}
+}
+
+static void
+i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj,
+			    struct ttm_place *requested,
+			    struct ttm_place *busy,
+			    struct ttm_placement *placement)
+{
+	unsigned int num_allowed = obj->mm.n_placements;
+	unsigned int flags = obj->flags;
+	unsigned int i;
+
+	placement->num_placement = 1;
+	i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] :
+				   obj->mm.region, requested, obj->bo_offset,
+				   obj->base.size, flags);
+
+	/* Cache this on object? */
+	placement->num_busy_placement = num_allowed;
+	for (i = 0; i < placement->num_busy_placement; ++i)
+		i915_ttm_place_from_region(obj->mm.placements[i], busy + i,
+					   obj->bo_offset, obj->base.size, flags);
+
+	if (num_allowed == 0) {
+		*busy = *requested;
+		placement->num_busy_placement = 1;
+	}
+
+	placement->placement = requested;
+	placement->busy_placement = busy;
+}
+
+static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev,
+				      struct ttm_tt *ttm,
+				      struct ttm_operation_ctx *ctx)
+{
+	struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+	struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM];
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev);
+	const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT;
+	struct file *filp = i915_tt->filp;
+	struct sgt_iter sgt_iter;
+	struct sg_table *st;
+	struct page *page;
+	unsigned long i;
+	int err;
+
+	if (!filp) {
+		struct address_space *mapping;
+		gfp_t mask;
+
+		filp = shmem_file_setup("i915-shmem-tt", size, VM_NORESERVE);
+		if (IS_ERR(filp))
+			return PTR_ERR(filp);
+
+		mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
+
+		mapping = filp->f_mapping;
+		mapping_set_gfp_mask(mapping, mask);
+		GEM_BUG_ON(!(mapping_gfp_mask(mapping) & __GFP_RECLAIM));
+
+		i915_tt->filp = filp;
+	}
+
+	st = &i915_tt->cached_rsgt.table;
+	err = shmem_sg_alloc_table(i915, st, size, mr, filp->f_mapping,
+				   max_segment);
+	if (err)
+		return err;
+
+	err = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL,
+			      DMA_ATTR_SKIP_CPU_SYNC);
+	if (err)
+		goto err_free_st;
+
+	i = 0;
+	for_each_sgt_page(page, sgt_iter, st)
+		ttm->pages[i++] = page;
+
+	if (ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+		ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+
+	return 0;
+
+err_free_st:
+	shmem_sg_free_table(st, filp->f_mapping, false, false);
+
+	return err;
+}
+
+static void i915_ttm_tt_shmem_unpopulate(struct ttm_tt *ttm)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	bool backup = ttm->page_flags & TTM_TT_FLAG_SWAPPED;
+	struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+	shmem_sg_free_table(st, file_inode(i915_tt->filp)->i_mapping,
+			    backup, backup);
+}
+
+static void i915_ttm_tt_release(struct kref *ref)
+{
+	struct i915_ttm_tt *i915_tt =
+		container_of(ref, typeof(*i915_tt), cached_rsgt.kref);
+	struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+	GEM_WARN_ON(st->sgl);
+
+	kfree(i915_tt);
+}
+
+static const struct i915_refct_sgt_ops tt_rsgt_ops = {
+	.release = i915_ttm_tt_release
+};
+
+static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo,
+					 uint32_t page_flags)
+{
+	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+						     bdev);
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	unsigned long ccs_pages = 0;
+	enum ttm_caching caching;
+	struct i915_ttm_tt *i915_tt;
+	int ret;
+
+	if (i915_ttm_is_ghost_object(bo))
+		return NULL;
+
+	i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL);
+	if (!i915_tt)
+		return NULL;
+
+	if (obj->flags & I915_BO_ALLOC_CPU_CLEAR && (!bo->resource ||
+	    ttm_manager_type(bo->bdev, bo->resource->mem_type)->use_tt))
+		page_flags |= TTM_TT_FLAG_ZERO_ALLOC;
+
+	caching = i915_ttm_select_tt_caching(obj);
+	if (i915_gem_object_is_shrinkable(obj) && caching == ttm_cached) {
+		page_flags |= TTM_TT_FLAG_EXTERNAL |
+			      TTM_TT_FLAG_EXTERNAL_MAPPABLE;
+		i915_tt->is_shmem = true;
+	}
+
+	if (i915_gem_object_needs_ccs_pages(obj))
+		ccs_pages = DIV_ROUND_UP(DIV_ROUND_UP(bo->base.size,
+						      NUM_BYTES_PER_CCS_BYTE),
+					 PAGE_SIZE);
+
+	ret = ttm_tt_init(&i915_tt->ttm, bo, page_flags, caching, ccs_pages);
+	if (ret)
+		goto err_free;
+
+	__i915_refct_sgt_init(&i915_tt->cached_rsgt, bo->base.size,
+			      &tt_rsgt_ops);
+
+	i915_tt->dev = obj->base.dev->dev;
+
+	return &i915_tt->ttm;
+
+err_free:
+	kfree(i915_tt);
+	return NULL;
+}
+
+static int i915_ttm_tt_populate(struct ttm_device *bdev,
+				struct ttm_tt *ttm,
+				struct ttm_operation_ctx *ctx)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+	if (i915_tt->is_shmem)
+		return i915_ttm_tt_shmem_populate(bdev, ttm, ctx);
+
+	return ttm_pool_alloc(&bdev->pool, ttm, ctx);
+}
+
+static void i915_ttm_tt_unpopulate(struct ttm_device *bdev, struct ttm_tt *ttm)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	struct sg_table *st = &i915_tt->cached_rsgt.table;
+
+	if (st->sgl)
+		dma_unmap_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
+
+	if (i915_tt->is_shmem) {
+		i915_ttm_tt_shmem_unpopulate(ttm);
+	} else {
+		sg_free_table(st);
+		ttm_pool_free(&bdev->pool, ttm);
+	}
+}
+
+static void i915_ttm_tt_destroy(struct ttm_device *bdev, struct ttm_tt *ttm)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+
+	if (i915_tt->filp)
+		fput(i915_tt->filp);
+
+	ttm_tt_fini(ttm);
+	i915_refct_sgt_put(&i915_tt->cached_rsgt);
+}
+
+static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo,
+				       const struct ttm_place *place)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+
+	if (i915_ttm_is_ghost_object(bo))
+		return false;
+
+	/*
+	 * EXTERNAL objects should never be swapped out by TTM, instead we need
+	 * to handle that ourselves. TTM will already skip such objects for us,
+	 * but we would like to avoid grabbing locks for no good reason.
+	 */
+	if (bo->ttm && bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL)
+		return false;
+
+	/* Will do for now. Our pinned objects are still on TTM's LRU lists */
+	if (!i915_gem_object_evictable(obj))
+		return false;
+
+	return ttm_bo_eviction_valuable(bo, place);
+}
+
+static void i915_ttm_evict_flags(struct ttm_buffer_object *bo,
+				 struct ttm_placement *placement)
+{
+	*placement = i915_sys_placement;
+}
+
+/**
+ * i915_ttm_free_cached_io_rsgt - Free object cached LMEM information
+ * @obj: The GEM object
+ * This function frees any LMEM-related information that is cached on
+ * the object. For example the radix tree for fast page lookup and the
+ * cached refcounted sg-table
+ */
+void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj)
+{
+	struct radix_tree_iter iter;
+	void __rcu **slot;
+
+	if (!obj->ttm.cached_io_rsgt)
+		return;
+
+	rcu_read_lock();
+	radix_tree_for_each_slot(slot, &obj->ttm.get_io_page.radix, &iter, 0)
+		radix_tree_delete(&obj->ttm.get_io_page.radix, iter.index);
+	rcu_read_unlock();
+
+	i915_refct_sgt_put(obj->ttm.cached_io_rsgt);
+	obj->ttm.cached_io_rsgt = NULL;
+}
+
+/**
+ * i915_ttm_purge - Clear an object of its memory
+ * @obj: The object
+ *
+ * This function is called to clear an object of it's memory when it is
+ * marked as not needed anymore.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int i915_ttm_purge(struct drm_i915_gem_object *obj)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	struct ttm_placement place = {};
+	int ret;
+
+	if (obj->mm.madv == __I915_MADV_PURGED)
+		return 0;
+
+	ret = ttm_bo_validate(bo, &place, &ctx);
+	if (ret)
+		return ret;
+
+	if (bo->ttm && i915_tt->filp) {
+		/*
+		 * The below fput(which eventually calls shmem_truncate) might
+		 * be delayed by worker, so when directly called to purge the
+		 * pages(like by the shrinker) we should try to be more
+		 * aggressive and release the pages immediately.
+		 */
+		shmem_truncate_range(file_inode(i915_tt->filp),
+				     0, (loff_t)-1);
+		fput(fetch_and_zero(&i915_tt->filp));
+	}
+
+	obj->write_domain = 0;
+	obj->read_domains = 0;
+	i915_ttm_adjust_gem_after_move(obj);
+	i915_ttm_free_cached_io_rsgt(obj);
+	obj->mm.madv = __I915_MADV_PURGED;
+
+	return 0;
+}
+
+static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = flags & I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT,
+	};
+	struct ttm_placement place = {};
+	int ret;
+
+	if (!bo->ttm || i915_ttm_cpu_maps_iomem(bo->resource))
+		return 0;
+
+	GEM_BUG_ON(!i915_tt->is_shmem);
+
+	if (!i915_tt->filp)
+		return 0;
+
+	ret = ttm_bo_wait_ctx(bo, &ctx);
+	if (ret)
+		return ret;
+
+	switch (obj->mm.madv) {
+	case I915_MADV_DONTNEED:
+		return i915_ttm_purge(obj);
+	case __I915_MADV_PURGED:
+		return 0;
+	}
+
+	if (bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED)
+		return 0;
+
+	bo->ttm->page_flags |= TTM_TT_FLAG_SWAPPED;
+	ret = ttm_bo_validate(bo, &place, &ctx);
+	if (ret) {
+		bo->ttm->page_flags &= ~TTM_TT_FLAG_SWAPPED;
+		return ret;
+	}
+
+	if (flags & I915_GEM_OBJECT_SHRINK_WRITEBACK)
+		__shmem_writeback(obj->base.size, i915_tt->filp->f_mapping);
+
+	return 0;
+}
+
+static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+
+	/*
+	 * This gets called twice by ttm, so long as we have a ttm resource or
+	 * ttm_tt then we can still safely call this. Due to pipeline-gutting,
+	 * we maybe have NULL bo->resource, but in that case we should always
+	 * have a ttm alive (like if the pages are swapped out).
+	 */
+	if ((bo->resource || bo->ttm) && !i915_ttm_is_ghost_object(bo)) {
+		__i915_gem_object_pages_fini(obj);
+		i915_ttm_free_cached_io_rsgt(obj);
+	}
+}
+
+static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm)
+{
+	struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm);
+	struct sg_table *st;
+	int ret;
+
+	if (i915_tt->cached_rsgt.table.sgl)
+		return i915_refct_sgt_get(&i915_tt->cached_rsgt);
+
+	st = &i915_tt->cached_rsgt.table;
+	ret = sg_alloc_table_from_pages_segment(st,
+			ttm->pages, ttm->num_pages,
+			0, (unsigned long)ttm->num_pages << PAGE_SHIFT,
+			i915_sg_segment_size(i915_tt->dev), GFP_KERNEL);
+	if (ret) {
+		st->sgl = NULL;
+		return ERR_PTR(ret);
+	}
+
+	ret = dma_map_sgtable(i915_tt->dev, st, DMA_BIDIRECTIONAL, 0);
+	if (ret) {
+		sg_free_table(st);
+		return ERR_PTR(ret);
+	}
+
+	return i915_refct_sgt_get(&i915_tt->cached_rsgt);
+}
+
+/**
+ * i915_ttm_resource_get_st - Get a refcounted sg-table pointing to the
+ * resource memory
+ * @obj: The GEM object used for sg-table caching
+ * @res: The struct ttm_resource for which an sg-table is requested.
+ *
+ * This function returns a refcounted sg-table representing the memory
+ * pointed to by @res. If @res is the object's current resource it may also
+ * cache the sg_table on the object or attempt to access an already cached
+ * sg-table. The refcounted sg-table needs to be put when no-longer in use.
+ *
+ * Return: A valid pointer to a struct i915_refct_sgt or error pointer on
+ * failure.
+ */
+struct i915_refct_sgt *
+i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
+			 struct ttm_resource *res)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	u32 page_alignment;
+
+	if (!i915_ttm_gtt_binds_lmem(res))
+		return i915_ttm_tt_get_st(bo->ttm);
+
+	page_alignment = bo->page_alignment << PAGE_SHIFT;
+	if (!page_alignment)
+		page_alignment = obj->mm.region->min_page_size;
+
+	/*
+	 * If CPU mapping differs, we need to add the ttm_tt pages to
+	 * the resulting st. Might make sense for GGTT.
+	 */
+	GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(res));
+	if (bo->resource == res) {
+		if (!obj->ttm.cached_io_rsgt) {
+			struct i915_refct_sgt *rsgt;
+
+			rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
+								 res,
+								 page_alignment);
+			if (IS_ERR(rsgt))
+				return rsgt;
+
+			obj->ttm.cached_io_rsgt = rsgt;
+		}
+		return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
+	}
+
+	return intel_region_ttm_resource_to_rsgt(obj->mm.region, res,
+						 page_alignment);
+}
+
+static int i915_ttm_truncate(struct drm_i915_gem_object *obj)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	long err;
+
+	WARN_ON_ONCE(obj->mm.madv == I915_MADV_WILLNEED);
+
+	err = dma_resv_wait_timeout(bo->base.resv, DMA_RESV_USAGE_BOOKKEEP,
+				    true, 15 * HZ);
+	if (err < 0)
+		return err;
+	if (err == 0)
+		return -EBUSY;
+
+	err = i915_ttm_move_notify(bo);
+	if (err)
+		return err;
+
+	return i915_ttm_purge(obj);
+}
+
+static void i915_ttm_swap_notify(struct ttm_buffer_object *bo)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	int ret;
+
+	if (i915_ttm_is_ghost_object(bo))
+		return;
+
+	ret = i915_ttm_move_notify(bo);
+	GEM_WARN_ON(ret);
+	GEM_WARN_ON(obj->ttm.cached_io_rsgt);
+	if (!ret && obj->mm.madv != I915_MADV_WILLNEED)
+		i915_ttm_purge(obj);
+}
+
+/**
+ * i915_ttm_resource_mappable - Return true if the ttm resource is CPU
+ * accessible.
+ * @res: The TTM resource to check.
+ *
+ * This is interesting on small-BAR systems where we may encounter lmem objects
+ * that can't be accessed via the CPU.
+ */
+bool i915_ttm_resource_mappable(struct ttm_resource *res)
+{
+	struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
+
+	if (!i915_ttm_cpu_maps_iomem(res))
+		return true;
+
+	return bman_res->used_visible_size == PFN_UP(bman_res->base.size);
+}
+
+static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource *mem)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo);
+	bool unknown_state;
+
+	if (i915_ttm_is_ghost_object(mem->bo))
+		return -EINVAL;
+
+	if (!kref_get_unless_zero(&obj->base.refcount))
+		return -EINVAL;
+
+	assert_object_held(obj);
+
+	unknown_state = i915_gem_object_has_unknown_state(obj);
+	i915_gem_object_put(obj);
+	if (unknown_state)
+		return -EINVAL;
+
+	if (!i915_ttm_cpu_maps_iomem(mem))
+		return 0;
+
+	if (!i915_ttm_resource_mappable(mem))
+		return -EINVAL;
+
+	mem->bus.caching = ttm_write_combined;
+	mem->bus.is_iomem = true;
+
+	return 0;
+}
+
+static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+					 unsigned long page_offset)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct scatterlist *sg;
+	unsigned long base;
+	unsigned int ofs;
+
+	GEM_BUG_ON(i915_ttm_is_ghost_object(bo));
+	GEM_WARN_ON(bo->ttm);
+
+	base = obj->mm.region->iomap.base - obj->mm.region->region.start;
+	sg = i915_gem_object_page_iter_get_sg(obj, &obj->ttm.get_io_page, page_offset, &ofs);
+
+	return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs;
+}
+
+static int i915_ttm_access_memory(struct ttm_buffer_object *bo,
+				  unsigned long offset, void *buf,
+				  int len, int write)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	resource_size_t iomap = obj->mm.region->iomap.base -
+		obj->mm.region->region.start;
+	unsigned long page = offset >> PAGE_SHIFT;
+	unsigned long bytes_left = len;
+
+	/*
+	 * TODO: For now just let it fail if the resource is non-mappable,
+	 * otherwise we need to perform the memcpy from the gpu here, without
+	 * interfering with the object (like moving the entire thing).
+	 */
+	if (!i915_ttm_resource_mappable(bo->resource))
+		return -EIO;
+
+	offset -= page << PAGE_SHIFT;
+	do {
+		unsigned long bytes = min(bytes_left, PAGE_SIZE - offset);
+		void __iomem *ptr;
+		dma_addr_t daddr;
+
+		daddr = i915_gem_object_get_dma_address(obj, page);
+		ptr = ioremap_wc(iomap + daddr + offset, bytes);
+		if (!ptr)
+			return -EIO;
+
+		if (write)
+			memcpy_toio(ptr, buf, bytes);
+		else
+			memcpy_fromio(buf, ptr, bytes);
+		iounmap(ptr);
+
+		page++;
+		buf += bytes;
+		bytes_left -= bytes;
+		offset = 0;
+	} while (bytes_left);
+
+	return len;
+}
+
+/*
+ * All callbacks need to take care not to downcast a struct ttm_buffer_object
+ * without checking its subclass, since it might be a TTM ghost object.
+ */
+static struct ttm_device_funcs i915_ttm_bo_driver = {
+	.ttm_tt_create = i915_ttm_tt_create,
+	.ttm_tt_populate = i915_ttm_tt_populate,
+	.ttm_tt_unpopulate = i915_ttm_tt_unpopulate,
+	.ttm_tt_destroy = i915_ttm_tt_destroy,
+	.eviction_valuable = i915_ttm_eviction_valuable,
+	.evict_flags = i915_ttm_evict_flags,
+	.move = i915_ttm_move,
+	.swap_notify = i915_ttm_swap_notify,
+	.delete_mem_notify = i915_ttm_delete_mem_notify,
+	.io_mem_reserve = i915_ttm_io_mem_reserve,
+	.io_mem_pfn = i915_ttm_io_mem_pfn,
+	.access_memory = i915_ttm_access_memory,
+};
+
+/**
+ * i915_ttm_driver - Return a pointer to the TTM device funcs
+ *
+ * Return: Pointer to statically allocated TTM device funcs.
+ */
+struct ttm_device_funcs *i915_ttm_driver(void)
+{
+	return &i915_ttm_bo_driver;
+}
+
+static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj,
+				struct ttm_placement *placement)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	int real_num_busy;
+	int ret;
+
+	/* First try only the requested placement. No eviction. */
+	real_num_busy = fetch_and_zero(&placement->num_busy_placement);
+	ret = ttm_bo_validate(bo, placement, &ctx);
+	if (ret) {
+		ret = i915_ttm_err_to_gem(ret);
+		/*
+		 * Anything that wants to restart the operation gets to
+		 * do that.
+		 */
+		if (ret == -EDEADLK || ret == -EINTR || ret == -ERESTARTSYS ||
+		    ret == -EAGAIN)
+			return ret;
+
+		/*
+		 * If the initial attempt fails, allow all accepted placements,
+		 * evicting if necessary.
+		 */
+		placement->num_busy_placement = real_num_busy;
+		ret = ttm_bo_validate(bo, placement, &ctx);
+		if (ret)
+			return i915_ttm_err_to_gem(ret);
+	}
+
+	if (bo->ttm && !ttm_tt_is_populated(bo->ttm)) {
+		ret = ttm_tt_populate(bo->bdev, bo->ttm, &ctx);
+		if (ret)
+			return ret;
+
+		i915_ttm_adjust_domains_after_move(obj);
+		i915_ttm_adjust_gem_after_move(obj);
+	}
+
+	if (!i915_gem_object_has_pages(obj)) {
+		struct i915_refct_sgt *rsgt =
+			i915_ttm_resource_get_st(obj, bo->resource);
+
+		if (IS_ERR(rsgt))
+			return PTR_ERR(rsgt);
+
+		GEM_BUG_ON(obj->mm.rsgt);
+		obj->mm.rsgt = rsgt;
+		__i915_gem_object_set_pages(obj, &rsgt->table);
+	}
+
+	GEM_BUG_ON(bo->ttm && ((obj->base.size >> PAGE_SHIFT) < bo->ttm->num_pages));
+	i915_ttm_adjust_lru(obj);
+	return ret;
+}
+
+static int i915_ttm_get_pages(struct drm_i915_gem_object *obj)
+{
+	struct ttm_place requested, busy[I915_TTM_MAX_PLACEMENTS];
+	struct ttm_placement placement;
+
+	/* restricted by sg_alloc_table */
+	if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int))
+		return -E2BIG;
+
+	GEM_BUG_ON(obj->mm.n_placements > I915_TTM_MAX_PLACEMENTS);
+
+	/* Move to the requested placement. */
+	i915_ttm_placement_from_obj(obj, &requested, busy, &placement);
+
+	return __i915_ttm_get_pages(obj, &placement);
+}
+
+/**
+ * DOC: Migration vs eviction
+ *
+ * GEM migration may not be the same as TTM migration / eviction. If
+ * the TTM core decides to evict an object it may be evicted to a
+ * TTM memory type that is not in the object's allowable GEM regions, or
+ * in fact theoretically to a TTM memory type that doesn't correspond to
+ * a GEM memory region. In that case the object's GEM region is not
+ * updated, and the data is migrated back to the GEM region at
+ * get_pages time. TTM may however set up CPU ptes to the object even
+ * when it is evicted.
+ * Gem forced migration using the i915_ttm_migrate() op, is allowed even
+ * to regions that are not in the object's list of allowable placements.
+ */
+static int __i915_ttm_migrate(struct drm_i915_gem_object *obj,
+			      struct intel_memory_region *mr,
+			      unsigned int flags)
+{
+	struct ttm_place requested;
+	struct ttm_placement placement;
+	int ret;
+
+	i915_ttm_place_from_region(mr, &requested, obj->bo_offset,
+				   obj->base.size, flags);
+	placement.num_placement = 1;
+	placement.num_busy_placement = 1;
+	placement.placement = &requested;
+	placement.busy_placement = &requested;
+
+	ret = __i915_ttm_get_pages(obj, &placement);
+	if (ret)
+		return ret;
+
+	/*
+	 * Reinitialize the region bindings. This is primarily
+	 * required for objects where the new region is not in
+	 * its allowable placements.
+	 */
+	if (obj->mm.region != mr) {
+		i915_gem_object_release_memory_region(obj);
+		i915_gem_object_init_memory_region(obj, mr);
+	}
+
+	return 0;
+}
+
+static int i915_ttm_migrate(struct drm_i915_gem_object *obj,
+			    struct intel_memory_region *mr,
+			    unsigned int flags)
+{
+	return __i915_ttm_migrate(obj, mr, flags);
+}
+
+static void i915_ttm_put_pages(struct drm_i915_gem_object *obj,
+			       struct sg_table *st)
+{
+	/*
+	 * We're currently not called from a shrinker, so put_pages()
+	 * typically means the object is about to destroyed, or called
+	 * from move_notify(). So just avoid doing much for now.
+	 * If the object is not destroyed next, The TTM eviction logic
+	 * and shrinkers will move it out if needed.
+	 */
+
+	if (obj->mm.rsgt)
+		i915_refct_sgt_put(fetch_and_zero(&obj->mm.rsgt));
+}
+
+/**
+ * i915_ttm_adjust_lru - Adjust an object's position on relevant LRU lists.
+ * @obj: The object
+ */
+void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct i915_ttm_tt *i915_tt =
+		container_of(bo->ttm, typeof(*i915_tt), ttm);
+	bool shrinkable =
+		bo->ttm && i915_tt->filp && ttm_tt_is_populated(bo->ttm);
+
+	/*
+	 * Don't manipulate the TTM LRUs while in TTM bo destruction.
+	 * We're called through i915_ttm_delete_mem_notify().
+	 */
+	if (!kref_read(&bo->kref))
+		return;
+
+	/*
+	 * We skip managing the shrinker LRU in set_pages() and just manage
+	 * everything here. This does at least solve the issue with having
+	 * temporary shmem mappings(like with evicted lmem) not being visible to
+	 * the shrinker. Only our shmem objects are shrinkable, everything else
+	 * we keep as unshrinkable.
+	 *
+	 * To make sure everything plays nice we keep an extra shrink pin in TTM
+	 * if the underlying pages are not currently shrinkable. Once we release
+	 * our pin, like when the pages are moved to shmem, the pages will then
+	 * be added to the shrinker LRU, assuming the caller isn't also holding
+	 * a pin.
+	 *
+	 * TODO: consider maybe also bumping the shrinker list here when we have
+	 * already unpinned it, which should give us something more like an LRU.
+	 *
+	 * TODO: There is a small window of opportunity for this function to
+	 * get called from eviction after we've dropped the last GEM refcount,
+	 * but before the TTM deleted flag is set on the object. Avoid
+	 * adjusting the shrinker list in such cases, since the object is
+	 * not available to the shrinker anyway due to its zero refcount.
+	 * To fix this properly we should move to a TTM shrinker LRU list for
+	 * these objects.
+	 */
+	if (kref_get_unless_zero(&obj->base.refcount)) {
+		if (shrinkable != obj->mm.ttm_shrinkable) {
+			if (shrinkable) {
+				if (obj->mm.madv == I915_MADV_WILLNEED)
+					__i915_gem_object_make_shrinkable(obj);
+				else
+					__i915_gem_object_make_purgeable(obj);
+			} else {
+				i915_gem_object_make_unshrinkable(obj);
+			}
+
+			obj->mm.ttm_shrinkable = shrinkable;
+		}
+		i915_gem_object_put(obj);
+	}
+
+	/*
+	 * Put on the correct LRU list depending on the MADV status
+	 */
+	spin_lock(&bo->bdev->lru_lock);
+	if (shrinkable) {
+		/* Try to keep shmem_tt from being considered for shrinking. */
+		bo->priority = TTM_MAX_BO_PRIORITY - 1;
+	} else if (obj->mm.madv != I915_MADV_WILLNEED) {
+		bo->priority = I915_TTM_PRIO_PURGE;
+	} else if (!i915_gem_object_has_pages(obj)) {
+		bo->priority = I915_TTM_PRIO_NO_PAGES;
+	} else {
+		struct ttm_resource_manager *man =
+			ttm_manager_type(bo->bdev, bo->resource->mem_type);
+
+		/*
+		 * If we need to place an LMEM resource which doesn't need CPU
+		 * access then we should try not to victimize mappable objects
+		 * first, since we likely end up stealing more of the mappable
+		 * portion. And likewise when we try to find space for a mappble
+		 * object, we know not to ever victimize objects that don't
+		 * occupy any mappable pages.
+		 */
+		if (i915_ttm_cpu_maps_iomem(bo->resource) &&
+		    i915_ttm_buddy_man_visible_size(man) < man->size &&
+		    !(obj->flags & I915_BO_ALLOC_GPU_ONLY))
+			bo->priority = I915_TTM_PRIO_NEEDS_CPU_ACCESS;
+		else
+			bo->priority = I915_TTM_PRIO_HAS_PAGES;
+	}
+
+	ttm_bo_move_to_lru_tail(bo);
+	spin_unlock(&bo->bdev->lru_lock);
+}
+
+/*
+ * TTM-backed gem object destruction requires some clarification.
+ * Basically we have two possibilities here. We can either rely on the
+ * i915 delayed destruction and put the TTM object when the object
+ * is idle. This would be detected by TTM which would bypass the
+ * TTM delayed destroy handling. The other approach is to put the TTM
+ * object early and rely on the TTM destroyed handling, and then free
+ * the leftover parts of the GEM object once TTM's destroyed list handling is
+ * complete. For now, we rely on the latter for two reasons:
+ * a) TTM can evict an object even when it's on the delayed destroy list,
+ * which in theory allows for complete eviction.
+ * b) There is work going on in TTM to allow freeing an object even when
+ * it's not idle, and using the TTM destroyed list handling could help us
+ * benefit from that.
+ */
+static void i915_ttm_delayed_free(struct drm_i915_gem_object *obj)
+{
+	GEM_BUG_ON(!obj->ttm.created);
+
+	ttm_bo_put(i915_gem_to_ttm(obj));
+}
+
+static vm_fault_t vm_fault_ttm(struct vm_fault *vmf)
+{
+	struct vm_area_struct *area = vmf->vma;
+	struct ttm_buffer_object *bo = area->vm_private_data;
+	struct drm_device *dev = bo->base.dev;
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	intel_wakeref_t wakeref = 0;
+	vm_fault_t ret;
+	int idx;
+
+	/* Sanity check that we allow writing into this object */
+	if (unlikely(i915_gem_object_is_readonly(obj) &&
+		     area->vm_flags & VM_WRITE))
+		return VM_FAULT_SIGBUS;
+
+	ret = ttm_bo_vm_reserve(bo, vmf);
+	if (ret)
+		return ret;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED) {
+		dma_resv_unlock(bo->base.resv);
+		return VM_FAULT_SIGBUS;
+	}
+
+	/*
+	 * This must be swapped out with shmem ttm_tt (pipeline-gutting).
+	 * Calling ttm_bo_validate() here with TTM_PL_SYSTEM should only go as
+	 * far as far doing a ttm_bo_move_null(), which should skip all the
+	 * other junk.
+	 */
+	if (!bo->resource) {
+		struct ttm_operation_ctx ctx = {
+			.interruptible = true,
+			.no_wait_gpu = true, /* should be idle already */
+		};
+		int err;
+
+		GEM_BUG_ON(!bo->ttm || !(bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED));
+
+		err = ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
+		if (err) {
+			dma_resv_unlock(bo->base.resv);
+			return VM_FAULT_SIGBUS;
+		}
+	} else if (!i915_ttm_resource_mappable(bo->resource)) {
+		int err = -ENODEV;
+		int i;
+
+		for (i = 0; i < obj->mm.n_placements; i++) {
+			struct intel_memory_region *mr = obj->mm.placements[i];
+			unsigned int flags;
+
+			if (!mr->io_size && mr->type != INTEL_MEMORY_SYSTEM)
+				continue;
+
+			flags = obj->flags;
+			flags &= ~I915_BO_ALLOC_GPU_ONLY;
+			err = __i915_ttm_migrate(obj, mr, flags);
+			if (!err)
+				break;
+		}
+
+		if (err) {
+			drm_dbg(dev, "Unable to make resource CPU accessible(err = %pe)\n",
+				ERR_PTR(err));
+			dma_resv_unlock(bo->base.resv);
+			ret = VM_FAULT_SIGBUS;
+			goto out_rpm;
+		}
+	}
+
+	if (i915_ttm_cpu_maps_iomem(bo->resource))
+		wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
+
+	if (drm_dev_enter(dev, &idx)) {
+		ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+					       TTM_BO_VM_NUM_PREFAULT);
+		drm_dev_exit(idx);
+	} else {
+		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+	}
+
+	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+		goto out_rpm;
+
+	/*
+	 * ttm_bo_vm_reserve() already has dma_resv_lock.
+	 * userfault_count is protected by dma_resv lock and rpm wakeref.
+	 */
+	if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
+		obj->userfault_count = 1;
+		spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+		list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list);
+		spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+
+		GEM_WARN_ON(!i915_ttm_cpu_maps_iomem(bo->resource));
+	}
+
+	if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
+		intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref,
+				   msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND));
+
+	i915_ttm_adjust_lru(obj);
+
+	dma_resv_unlock(bo->base.resv);
+
+out_rpm:
+	if (wakeref)
+		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
+
+	return ret;
+}
+
+static int
+vm_access_ttm(struct vm_area_struct *area, unsigned long addr,
+	      void *buf, int len, int write)
+{
+	struct drm_i915_gem_object *obj =
+		i915_ttm_to_gem(area->vm_private_data);
+
+	if (i915_gem_object_is_readonly(obj) && write)
+		return -EACCES;
+
+	return ttm_bo_vm_access(area, addr, buf, len, write);
+}
+
+static void ttm_vm_open(struct vm_area_struct *vma)
+{
+	struct drm_i915_gem_object *obj =
+		i915_ttm_to_gem(vma->vm_private_data);
+
+	GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
+	i915_gem_object_get(obj);
+}
+
+static void ttm_vm_close(struct vm_area_struct *vma)
+{
+	struct drm_i915_gem_object *obj =
+		i915_ttm_to_gem(vma->vm_private_data);
+
+	GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data));
+	i915_gem_object_put(obj);
+}
+
+static const struct vm_operations_struct vm_ops_ttm = {
+	.fault = vm_fault_ttm,
+	.access = vm_access_ttm,
+	.open = ttm_vm_open,
+	.close = ttm_vm_close,
+};
+
+static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj)
+{
+	/* The ttm_bo must be allocated with I915_BO_ALLOC_USER */
+	GEM_BUG_ON(!drm_mm_node_allocated(&obj->base.vma_node.vm_node));
+
+	return drm_vma_node_offset_addr(&obj->base.vma_node);
+}
+
+static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	intel_wakeref_t wakeref = 0;
+
+	assert_object_held_shared(obj);
+
+	if (i915_ttm_cpu_maps_iomem(bo->resource)) {
+		wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
+
+		/* userfault_count is protected by obj lock and rpm wakeref. */
+		if (obj->userfault_count) {
+			spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+			list_del(&obj->userfault_link);
+			spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock);
+			obj->userfault_count = 0;
+		}
+	}
+
+	GEM_WARN_ON(obj->userfault_count);
+
+	ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
+
+	if (wakeref)
+		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref);
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = {
+	.name = "i915_gem_object_ttm",
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE |
+		 I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST,
+
+	.get_pages = i915_ttm_get_pages,
+	.put_pages = i915_ttm_put_pages,
+	.truncate = i915_ttm_truncate,
+	.shrink = i915_ttm_shrink,
+
+	.adjust_lru = i915_ttm_adjust_lru,
+	.delayed_free = i915_ttm_delayed_free,
+	.migrate = i915_ttm_migrate,
+
+	.mmap_offset = i915_ttm_mmap_offset,
+	.unmap_virtual = i915_ttm_unmap_virtual,
+	.mmap_ops = &vm_ops_ttm,
+};
+
+void i915_ttm_bo_destroy(struct ttm_buffer_object *bo)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+
+	i915_gem_object_release_memory_region(obj);
+	mutex_destroy(&obj->ttm.get_io_page.lock);
+
+	if (obj->ttm.created) {
+		/*
+		 * We freely manage the shrinker LRU outide of the mm.pages life
+		 * cycle. As a result when destroying the object we should be
+		 * extra paranoid and ensure we remove it from the LRU, before
+		 * we free the object.
+		 *
+		 * Touching the ttm_shrinkable outside of the object lock here
+		 * should be safe now that the last GEM object ref was dropped.
+		 */
+		if (obj->mm.ttm_shrinkable)
+			i915_gem_object_make_unshrinkable(obj);
+
+		i915_ttm_backup_free(obj);
+
+		/* This releases all gem object bindings to the backend. */
+		__i915_gem_free_object(obj);
+
+		call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
+	} else {
+		__i915_gem_object_fini(obj);
+	}
+}
+
+/*
+ * __i915_gem_ttm_object_init - Initialize a ttm-backed i915 gem object
+ * @mem: The initial memory region for the object.
+ * @obj: The gem object.
+ * @size: Object size in bytes.
+ * @flags: gem object flags.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
+			       struct drm_i915_gem_object *obj,
+			       resource_size_t offset,
+			       resource_size_t size,
+			       resource_size_t page_size,
+			       unsigned int flags)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_private *i915 = mem->i915;
+	struct ttm_operation_ctx ctx = {
+		.interruptible = true,
+		.no_wait_gpu = false,
+	};
+	enum ttm_bo_type bo_type;
+	int ret;
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+	i915_gem_object_init(obj, &i915_gem_ttm_obj_ops, &lock_class, flags);
+
+	obj->bo_offset = offset;
+
+	/* Don't put on a region list until we're either locked or fully initialized. */
+	obj->mm.region = mem;
+	INIT_LIST_HEAD(&obj->mm.region_link);
+
+	INIT_RADIX_TREE(&obj->ttm.get_io_page.radix, GFP_KERNEL | __GFP_NOWARN);
+	mutex_init(&obj->ttm.get_io_page.lock);
+	bo_type = (obj->flags & I915_BO_ALLOC_USER) ? ttm_bo_type_device :
+		ttm_bo_type_kernel;
+
+	obj->base.vma_node.driver_private = i915_gem_to_ttm(obj);
+
+	/* Forcing the page size is kernel internal only */
+	GEM_BUG_ON(page_size && obj->mm.n_placements);
+
+	/*
+	 * Keep an extra shrink pin to prevent the object from being made
+	 * shrinkable too early. If the ttm_tt is ever allocated in shmem, we
+	 * drop the pin. The TTM backend manages the shrinker LRU itself,
+	 * outside of the normal mm.pages life cycle.
+	 */
+	i915_gem_object_make_unshrinkable(obj);
+
+	/*
+	 * If this function fails, it will call the destructor, but
+	 * our caller still owns the object. So no freeing in the
+	 * destructor until obj->ttm.created is true.
+	 * Similarly, in delayed_destroy, we can't call ttm_bo_put()
+	 * until successful initialization.
+	 */
+	ret = ttm_bo_init_reserved(&i915->bdev, i915_gem_to_ttm(obj), bo_type,
+				   &i915_sys_placement, page_size >> PAGE_SHIFT,
+				   &ctx, NULL, NULL, i915_ttm_bo_destroy);
+
+	/*
+	 * XXX: The ttm_bo_init_reserved() functions returns -ENOSPC if the size
+	 * is too big to add vma. The direct function that returns -ENOSPC is
+	 * drm_mm_insert_node_in_range(). To handle the same error as other code
+	 * that returns -E2BIG when the size is too large, it converts -ENOSPC to
+	 * -E2BIG.
+	 */
+	if (size >> PAGE_SHIFT > INT_MAX && ret == -ENOSPC)
+		ret = -E2BIG;
+
+	if (ret)
+		return i915_ttm_err_to_gem(ret);
+
+	obj->ttm.created = true;
+	i915_gem_object_release_memory_region(obj);
+	i915_gem_object_init_memory_region(obj, mem);
+	i915_ttm_adjust_domains_after_move(obj);
+	i915_ttm_adjust_gem_after_move(obj);
+	i915_gem_object_unlock(obj);
+
+	return 0;
+}
+
+static const struct intel_memory_region_ops ttm_system_region_ops = {
+	.init_object = __i915_gem_ttm_object_init,
+	.release = intel_region_ttm_fini,
+};
+
+struct intel_memory_region *
+i915_gem_ttm_system_setup(struct drm_i915_private *i915,
+			  u16 type, u16 instance)
+{
+	struct intel_memory_region *mr;
+
+	mr = intel_memory_region_create(i915, 0,
+					totalram_pages() << PAGE_SHIFT,
+					PAGE_SIZE, 0, 0,
+					type, instance,
+					&ttm_system_region_ops);
+	if (IS_ERR(mr))
+		return mr;
+
+	intel_memory_region_set_name(mr, "system-ttm");
+	return mr;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
new file mode 100644
index 0000000000..67347e62e2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _I915_GEM_TTM_H_
+#define _I915_GEM_TTM_H_
+
+#include <drm/ttm/ttm_placement.h>
+
+#include "gem/i915_gem_object_types.h"
+
+/**
+ * i915_gem_to_ttm - Convert a struct drm_i915_gem_object to a
+ * struct ttm_buffer_object.
+ * @obj: Pointer to the gem object.
+ *
+ * Return: Pointer to the embedded struct ttm_buffer_object.
+ */
+static inline struct ttm_buffer_object *
+i915_gem_to_ttm(struct drm_i915_gem_object *obj)
+{
+	return &obj->__do_not_access;
+}
+
+/*
+ * i915 ttm gem object destructor. Internal use only.
+ */
+void i915_ttm_bo_destroy(struct ttm_buffer_object *bo);
+
+/**
+ * i915_ttm_is_ghost_object - Check if the ttm bo is a ghost object.
+ * @bo: Pointer to the ttm buffer object
+ *
+ * Return: True if the ttm bo is not a i915 object but a ghost ttm object,
+ * False otherwise.
+ */
+static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo)
+{
+	return bo->destroy != i915_ttm_bo_destroy;
+}
+
+/**
+ * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding
+ * struct drm_i915_gem_object.
+ * @bo: Pointer to the ttm buffer object
+ *
+ * Return: Pointer to the embedding struct drm_i915_gem_object.
+ */
+static inline struct drm_i915_gem_object *
+i915_ttm_to_gem(struct ttm_buffer_object *bo)
+{
+	return container_of(bo, struct drm_i915_gem_object, __do_not_access);
+}
+
+int __i915_gem_ttm_object_init(struct intel_memory_region *mem,
+			       struct drm_i915_gem_object *obj,
+			       resource_size_t offset,
+			       resource_size_t size,
+			       resource_size_t page_size,
+			       unsigned int flags);
+
+/* Internal I915 TTM declarations and definitions below. */
+
+#define I915_PL_LMEM0 TTM_PL_PRIV
+#define I915_PL_SYSTEM TTM_PL_SYSTEM
+#define I915_PL_STOLEN TTM_PL_VRAM
+#define I915_PL_GGTT TTM_PL_TT
+
+struct ttm_placement *i915_ttm_sys_placement(void);
+
+void i915_ttm_free_cached_io_rsgt(struct drm_i915_gem_object *obj);
+
+struct i915_refct_sgt *
+i915_ttm_resource_get_st(struct drm_i915_gem_object *obj,
+			 struct ttm_resource *res);
+
+void i915_ttm_adjust_lru(struct drm_i915_gem_object *obj);
+
+int i915_ttm_purge(struct drm_i915_gem_object *obj);
+
+/**
+ * i915_ttm_gtt_binds_lmem - Should the memory be viewed as LMEM by the GTT?
+ * @mem: struct ttm_resource representing the memory.
+ *
+ * Return: true if memory should be viewed as LMEM for GTT binding purposes,
+ * false otherwise.
+ */
+static inline bool i915_ttm_gtt_binds_lmem(struct ttm_resource *mem)
+{
+	return mem->mem_type != I915_PL_SYSTEM;
+}
+
+/**
+ * i915_ttm_cpu_maps_iomem - Should the memory be viewed as IOMEM by the CPU?
+ * @mem: struct ttm_resource representing the memory.
+ *
+ * Return: true if memory should be viewed as IOMEM for CPU mapping purposes.
+ */
+static inline bool i915_ttm_cpu_maps_iomem(struct ttm_resource *mem)
+{
+	/* Once / if we support GGTT, this is also false for cached ttm_tts */
+	return mem && mem->mem_type != I915_PL_SYSTEM;
+}
+
+bool i915_ttm_resource_mappable(struct ttm_resource *res);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
new file mode 100644
index 0000000000..7078af2f8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/ttm/ttm_tt.h>
+
+#include "i915_deps.h"
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+#include "intel_region_ttm.h"
+
+#include "gem/i915_gem_object.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
+
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_migrate.h"
+
+/**
+ * DOC: Selftest failure modes for failsafe migration:
+ *
+ * For fail_gpu_migration, the gpu blit scheduled is always a clear blit
+ * rather than a copy blit, and then we force the failure paths as if
+ * the blit fence returned an error.
+ *
+ * For fail_work_allocation we fail the kmalloc of the async worker, we
+ * sync the gpu blit. If it then fails, or fail_gpu_migration is set to
+ * true, then a memcpy operation is performed sync.
+ */
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+static bool fail_gpu_migration;
+static bool fail_work_allocation;
+static bool ban_memcpy;
+
+void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+					bool work_allocation)
+{
+	fail_gpu_migration = gpu_migration;
+	fail_work_allocation = work_allocation;
+}
+
+void i915_ttm_migrate_set_ban_memcpy(bool ban)
+{
+	ban_memcpy = ban;
+}
+#endif
+
+static enum i915_cache_level
+i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
+		     struct ttm_tt *ttm)
+{
+	return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
+		!i915_ttm_gtt_binds_lmem(res) &&
+		ttm->caching == ttm_cached) ? I915_CACHE_LLC :
+		I915_CACHE_NONE;
+}
+
+static struct intel_memory_region *
+i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
+{
+	struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
+
+	/* There's some room for optimization here... */
+	GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
+		   ttm_mem_type < I915_PL_LMEM0);
+	if (ttm_mem_type == I915_PL_SYSTEM)
+		return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
+						  0);
+
+	return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
+					  ttm_mem_type - I915_PL_LMEM0);
+}
+
+/**
+ * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a
+ * TTM move
+ * @obj: The gem object
+ */
+void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+
+	if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
+		obj->write_domain = I915_GEM_DOMAIN_WC;
+		obj->read_domains = I915_GEM_DOMAIN_WC;
+	} else {
+		obj->write_domain = I915_GEM_DOMAIN_CPU;
+		obj->read_domains = I915_GEM_DOMAIN_CPU;
+	}
+}
+
+/**
+ * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move
+ * @obj: The gem object
+ *
+ * Adjusts the GEM object's region, mem_flags and cache coherency after a
+ * TTM move.
+ */
+void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
+{
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	unsigned int cache_level;
+	unsigned int mem_flags;
+	unsigned int i;
+	int mem_type;
+
+	/*
+	 * We might have been purged (or swapped out) if the resource is NULL,
+	 * in which case the SYSTEM placement is the closest match to describe
+	 * the current domain. If the object is ever used in this state then we
+	 * will require moving it again.
+	 */
+	if (!bo->resource) {
+		mem_flags = I915_BO_FLAG_STRUCT_PAGE;
+		mem_type = I915_PL_SYSTEM;
+		cache_level = I915_CACHE_NONE;
+	} else {
+		mem_flags = i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
+			I915_BO_FLAG_STRUCT_PAGE;
+		mem_type = bo->resource->mem_type;
+		cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
+						   bo->ttm);
+	}
+
+	/*
+	 * If object was moved to an allowable region, update the object
+	 * region to consider it migrated. Note that if it's currently not
+	 * in an allowable region, it's evicted and we don't update the
+	 * object region.
+	 */
+	if (intel_region_to_ttm_type(obj->mm.region) != mem_type) {
+		for (i = 0; i < obj->mm.n_placements; ++i) {
+			struct intel_memory_region *mr = obj->mm.placements[i];
+
+			if (intel_region_to_ttm_type(mr) == mem_type &&
+			    mr != obj->mm.region) {
+				i915_gem_object_release_memory_region(obj);
+				i915_gem_object_init_memory_region(obj, mr);
+				break;
+			}
+		}
+	}
+
+	obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
+	obj->mem_flags |= mem_flags;
+
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+}
+
+/**
+ * i915_ttm_move_notify - Prepare an object for move
+ * @bo: The ttm buffer object.
+ *
+ * This function prepares an object for move by removing all GPU bindings,
+ * removing all CPU mapings and finally releasing the pages sg-table.
+ *
+ * Return: 0 if successful, negative error code on error.
+ */
+int i915_ttm_move_notify(struct ttm_buffer_object *bo)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	int ret;
+
+	/*
+	 * Note: The async unbinding here will actually transform the
+	 * blocking wait for unbind into a wait before finally submitting
+	 * evict / migration blit and thus stall the migration timeline
+	 * which may not be good for overall throughput. We should make
+	 * sure we await the unbind fences *after* the migration blit
+	 * instead of *before* as we currently do.
+	 */
+	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
+				     I915_GEM_OBJECT_UNBIND_ASYNC);
+	if (ret)
+		return ret;
+
+	ret = __i915_gem_object_put_pages(obj);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
+					     bool clear,
+					     struct ttm_resource *dst_mem,
+					     struct ttm_tt *dst_ttm,
+					     struct sg_table *dst_st,
+					     const struct i915_deps *deps)
+{
+	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
+						     bdev);
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct i915_request *rq;
+	struct ttm_tt *src_ttm = bo->ttm;
+	enum i915_cache_level src_level, dst_level;
+	int ret;
+
+	if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915)))
+		return ERR_PTR(-EINVAL);
+
+	/* With fail_gpu_migration, we always perform a GPU clear. */
+	if (I915_SELFTEST_ONLY(fail_gpu_migration))
+		clear = true;
+
+	dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
+	if (clear) {
+		if (bo->type == ttm_bo_type_kernel &&
+		    !I915_SELFTEST_ONLY(fail_gpu_migration))
+			return ERR_PTR(-EINVAL);
+
+		intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+		ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
+						  dst_st->sgl,
+						  i915_gem_get_pat_index(i915, dst_level),
+						  i915_ttm_gtt_binds_lmem(dst_mem),
+						  0, &rq);
+	} else {
+		struct i915_refct_sgt *src_rsgt =
+			i915_ttm_resource_get_st(obj, bo->resource);
+
+		if (IS_ERR(src_rsgt))
+			return ERR_CAST(src_rsgt);
+
+		src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
+		intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
+		ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
+						 deps, src_rsgt->table.sgl,
+						 i915_gem_get_pat_index(i915, src_level),
+						 i915_ttm_gtt_binds_lmem(bo->resource),
+						 dst_st->sgl,
+						 i915_gem_get_pat_index(i915, dst_level),
+						 i915_ttm_gtt_binds_lmem(dst_mem),
+						 &rq);
+
+		i915_refct_sgt_put(src_rsgt);
+	}
+
+	intel_engine_pm_put(to_gt(i915)->migrate.context->engine);
+
+	if (ret && rq) {
+		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+		i915_request_put(rq);
+	}
+
+	return ret ? ERR_PTR(ret) : &rq->fence;
+}
+
+/**
+ * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality.
+ * @_dst_iter: Storage space for the destination kmap iterator.
+ * @_src_iter: Storage space for the source kmap iterator.
+ * @dst_iter: Pointer to the destination kmap iterator.
+ * @src_iter: Pointer to the source kmap iterator.
+ * @num_pages: Number of pages
+ * @clear: Whether to clear instead of copy.
+ * @src_rsgt: Refcounted scatter-gather list of source memory.
+ * @dst_rsgt: Refcounted scatter-gather list of destination memory.
+ */
+struct i915_ttm_memcpy_arg {
+	union {
+		struct ttm_kmap_iter_tt tt;
+		struct ttm_kmap_iter_iomap io;
+	} _dst_iter,
+	_src_iter;
+	struct ttm_kmap_iter *dst_iter;
+	struct ttm_kmap_iter *src_iter;
+	unsigned long num_pages;
+	bool clear;
+	struct i915_refct_sgt *src_rsgt;
+	struct i915_refct_sgt *dst_rsgt;
+};
+
+/**
+ * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence.
+ * @fence: The dma-fence.
+ * @work: The work struct use for the memcpy work.
+ * @lock: The fence lock. Not used to protect anything else ATM.
+ * @irq_work: Low latency worker to signal the fence since it can't be done
+ * from the callback for lockdep reasons.
+ * @cb: Callback for the accelerated migration fence.
+ * @arg: The argument for the memcpy functionality.
+ * @i915: The i915 pointer.
+ * @obj: The GEM object.
+ * @memcpy_allowed: Instead of processing the @arg, and falling back to memcpy
+ * or memset, we wedge the device and set the @obj unknown_state, to prevent
+ * further access to the object with the CPU or GPU.  On some devices we might
+ * only be permitted to use the blitter engine for such operations.
+ */
+struct i915_ttm_memcpy_work {
+	struct dma_fence fence;
+	struct work_struct work;
+	spinlock_t lock;
+	struct irq_work irq_work;
+	struct dma_fence_cb cb;
+	struct i915_ttm_memcpy_arg arg;
+	struct drm_i915_private *i915;
+	struct drm_i915_gem_object *obj;
+	bool memcpy_allowed;
+};
+
+static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg)
+{
+	ttm_move_memcpy(arg->clear, arg->num_pages,
+			arg->dst_iter, arg->src_iter);
+}
+
+static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg,
+				 struct ttm_buffer_object *bo, bool clear,
+				 struct ttm_resource *dst_mem,
+				 struct ttm_tt *dst_ttm,
+				 struct i915_refct_sgt *dst_rsgt)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct intel_memory_region *dst_reg, *src_reg;
+
+	dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
+	src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
+	GEM_BUG_ON(!dst_reg || !src_reg);
+
+	arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ?
+		ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) :
+		ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap,
+					 &dst_rsgt->table, dst_reg->region.start);
+
+	arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ?
+		ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) :
+		ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap,
+					 &obj->ttm.cached_io_rsgt->table,
+					 src_reg->region.start);
+	arg->clear = clear;
+	arg->num_pages = bo->base.size >> PAGE_SHIFT;
+
+	arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
+	arg->src_rsgt = clear ? NULL :
+		i915_ttm_resource_get_st(obj, bo->resource);
+}
+
+static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg)
+{
+	i915_refct_sgt_put(arg->src_rsgt);
+	i915_refct_sgt_put(arg->dst_rsgt);
+}
+
+static void __memcpy_work(struct work_struct *work)
+{
+	struct i915_ttm_memcpy_work *copy_work =
+		container_of(work, typeof(*copy_work), work);
+	struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+	bool cookie;
+
+	/*
+	 * FIXME: We need to take a closer look here. We should be able to plonk
+	 * this into the fence critical section.
+	 */
+	if (!copy_work->memcpy_allowed) {
+		struct intel_gt *gt;
+		unsigned int id;
+
+		for_each_gt(gt, copy_work->i915, id)
+			intel_gt_set_wedged(gt);
+	}
+
+	cookie = dma_fence_begin_signalling();
+
+	if (copy_work->memcpy_allowed) {
+		i915_ttm_move_memcpy(arg);
+	} else {
+		/*
+		 * Prevent further use of the object. Any future GTT binding or
+		 * CPU access is not allowed once we signal the fence. Outside
+		 * of the fence critical section, we then also then wedge the gpu
+		 * to indicate the device is not functional.
+		 *
+		 * The below dma_fence_signal() is our write-memory-barrier.
+		 */
+		copy_work->obj->mm.unknown_state = true;
+	}
+
+	dma_fence_end_signalling(cookie);
+
+	dma_fence_signal(&copy_work->fence);
+
+	i915_ttm_memcpy_release(arg);
+	i915_gem_object_put(copy_work->obj);
+	dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_irq_work(struct irq_work *irq_work)
+{
+	struct i915_ttm_memcpy_work *copy_work =
+		container_of(irq_work, typeof(*copy_work), irq_work);
+	struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
+
+	dma_fence_signal(&copy_work->fence);
+	i915_ttm_memcpy_release(arg);
+	i915_gem_object_put(copy_work->obj);
+	dma_fence_put(&copy_work->fence);
+}
+
+static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct i915_ttm_memcpy_work *copy_work =
+		container_of(cb, typeof(*copy_work), cb);
+
+	if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) {
+		INIT_WORK(&copy_work->work, __memcpy_work);
+		queue_work(system_unbound_wq, &copy_work->work);
+	} else {
+		init_irq_work(&copy_work->irq_work, __memcpy_irq_work);
+		irq_work_queue(&copy_work->irq_work);
+	}
+}
+
+static const char *get_driver_name(struct dma_fence *fence)
+{
+	return "i915_ttm_memcpy_work";
+}
+
+static const char *get_timeline_name(struct dma_fence *fence)
+{
+	return "unbound";
+}
+
+static const struct dma_fence_ops dma_fence_memcpy_ops = {
+	.get_driver_name = get_driver_name,
+	.get_timeline_name = get_timeline_name,
+};
+
+static struct dma_fence *
+i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
+			 struct dma_fence *dep)
+{
+	int ret;
+
+	spin_lock_init(&work->lock);
+	dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0);
+	dma_fence_get(&work->fence);
+	ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb);
+	if (ret) {
+		if (ret != -ENOENT)
+			dma_fence_wait(dep, false);
+
+		return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL :
+			       dep->error);
+	}
+
+	return &work->fence;
+}
+
+static bool i915_ttm_memcpy_allowed(struct ttm_buffer_object *bo,
+				    struct ttm_resource *dst_mem)
+{
+	if (i915_gem_object_needs_ccs_pages(i915_ttm_to_gem(bo)))
+		return false;
+
+	if (!(i915_ttm_resource_mappable(bo->resource) &&
+	      i915_ttm_resource_mappable(dst_mem)))
+		return false;
+
+	return I915_SELFTEST_ONLY(ban_memcpy) ? false : true;
+}
+
+static struct dma_fence *
+__i915_ttm_move(struct ttm_buffer_object *bo,
+		const struct ttm_operation_ctx *ctx, bool clear,
+		struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
+		struct i915_refct_sgt *dst_rsgt, bool allow_accel,
+		const struct i915_deps *move_deps)
+{
+	const bool memcpy_allowed = i915_ttm_memcpy_allowed(bo, dst_mem);
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct drm_i915_private *i915 = to_i915(bo->base.dev);
+	struct i915_ttm_memcpy_work *copy_work = NULL;
+	struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
+	struct dma_fence *fence = ERR_PTR(-EINVAL);
+
+	if (allow_accel) {
+		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
+					    &dst_rsgt->table, move_deps);
+
+		/*
+		 * We only need to intercept the error when moving to lmem.
+		 * When moving to system, TTM or shmem will provide us with
+		 * cleared pages.
+		 */
+		if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) &&
+		    !I915_SELFTEST_ONLY(fail_gpu_migration ||
+					fail_work_allocation))
+			goto out;
+	}
+
+	/* If we've scheduled gpu migration. Try to arm error intercept. */
+	if (!IS_ERR(fence)) {
+		struct dma_fence *dep = fence;
+
+		if (!I915_SELFTEST_ONLY(fail_work_allocation))
+			copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
+
+		if (copy_work) {
+			copy_work->i915 = i915;
+			copy_work->memcpy_allowed = memcpy_allowed;
+			copy_work->obj = i915_gem_object_get(obj);
+			arg = &copy_work->arg;
+			if (memcpy_allowed)
+				i915_ttm_memcpy_init(arg, bo, clear, dst_mem,
+						     dst_ttm, dst_rsgt);
+
+			fence = i915_ttm_memcpy_work_arm(copy_work, dep);
+		} else {
+			dma_fence_wait(dep, false);
+			fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ?
+					-EINVAL : fence->error);
+		}
+		dma_fence_put(dep);
+
+		if (!IS_ERR(fence))
+			goto out;
+	} else {
+		int err = PTR_ERR(fence);
+
+		if (err == -EINTR || err == -ERESTARTSYS || err == -EAGAIN)
+			return fence;
+
+		if (move_deps) {
+			err = i915_deps_sync(move_deps, ctx);
+			if (err)
+				return ERR_PTR(err);
+		}
+	}
+
+	/* Error intercept failed or no accelerated migration to start with */
+
+	if (memcpy_allowed) {
+		if (!copy_work)
+			i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
+					     dst_rsgt);
+		i915_ttm_move_memcpy(arg);
+		i915_ttm_memcpy_release(arg);
+	}
+	if (copy_work)
+		i915_gem_object_put(copy_work->obj);
+	kfree(copy_work);
+
+	return memcpy_allowed ? NULL : ERR_PTR(-EIO);
+out:
+	if (!fence && copy_work) {
+		i915_ttm_memcpy_release(arg);
+		i915_gem_object_put(copy_work->obj);
+		kfree(copy_work);
+	}
+
+	return fence;
+}
+
+/**
+ * i915_ttm_move - The TTM move callback used by i915.
+ * @bo: The buffer object.
+ * @evict: Whether this is an eviction.
+ * @ctx: Pointer to a struct ttm_operation_ctx indicating how the waits should be
+ *       performed if waiting
+ * @dst_mem: The destination ttm resource.
+ * @hop: If we need multihop, what temporary memory type to move to.
+ *
+ * Return: 0 if successful, negative error code otherwise.
+ */
+int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+		  struct ttm_operation_ctx *ctx,
+		  struct ttm_resource *dst_mem,
+		  struct ttm_place *hop)
+{
+	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
+	struct ttm_resource_manager *dst_man =
+		ttm_manager_type(bo->bdev, dst_mem->mem_type);
+	struct dma_fence *migration_fence = NULL;
+	struct ttm_tt *ttm = bo->ttm;
+	struct i915_refct_sgt *dst_rsgt;
+	bool clear, prealloc_bo;
+	int ret;
+
+	if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) {
+		ttm_bo_move_null(bo, dst_mem);
+		return 0;
+	}
+
+	if (!bo->resource) {
+		if (dst_mem->mem_type != TTM_PL_SYSTEM) {
+			hop->mem_type = TTM_PL_SYSTEM;
+			hop->flags = TTM_PL_FLAG_TEMPORARY;
+			return -EMULTIHOP;
+		}
+
+		/*
+		 * This is only reached when first creating the object, or if
+		 * the object was purged or swapped out (pipeline-gutting). For
+		 * the former we can safely skip all of the below since we are
+		 * only using a dummy SYSTEM placement here. And with the latter
+		 * we will always re-enter here with bo->resource set correctly
+		 * (as per the above), since this is part of a multi-hop
+		 * sequence, where at the end we can do the move for real.
+		 *
+		 * The special case here is when the dst_mem is TTM_PL_SYSTEM,
+		 * which doens't require any kind of move, so it should be safe
+		 * to skip all the below and call ttm_bo_move_null() here, where
+		 * the caller in __i915_ttm_get_pages() will take care of the
+		 * rest, since we should have a valid ttm_tt.
+		 */
+		ttm_bo_move_null(bo, dst_mem);
+		return 0;
+	}
+
+	ret = i915_ttm_move_notify(bo);
+	if (ret)
+		return ret;
+
+	if (obj->mm.madv != I915_MADV_WILLNEED) {
+		i915_ttm_purge(obj);
+		ttm_resource_free(bo, &dst_mem);
+		return 0;
+	}
+
+	/* Populate ttm with pages if needed. Typically system memory. */
+	if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
+		ret = ttm_tt_populate(bo->bdev, ttm, ctx);
+		if (ret)
+			return ret;
+	}
+
+	dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
+	if (IS_ERR(dst_rsgt))
+		return PTR_ERR(dst_rsgt);
+
+	clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
+	prealloc_bo = obj->flags & I915_BO_PREALLOC;
+	if (!(clear && ttm && !((ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) && !prealloc_bo))) {
+		struct i915_deps deps;
+
+		i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+		ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
+		if (ret) {
+			i915_refct_sgt_put(dst_rsgt);
+			return ret;
+		}
+
+		migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, ttm,
+						  dst_rsgt, true, &deps);
+		i915_deps_fini(&deps);
+	}
+
+	/* We can possibly get an -ERESTARTSYS here */
+	if (IS_ERR(migration_fence)) {
+		i915_refct_sgt_put(dst_rsgt);
+		return PTR_ERR(migration_fence);
+	}
+
+	if (migration_fence) {
+		if (I915_SELFTEST_ONLY(evict && fail_gpu_migration))
+			ret = -EIO; /* never feed non-migrate fences into ttm */
+		else
+			ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict,
+							true, dst_mem);
+		if (ret) {
+			dma_fence_wait(migration_fence, false);
+			ttm_bo_move_sync_cleanup(bo, dst_mem);
+		}
+		dma_fence_put(migration_fence);
+	} else {
+		ttm_bo_move_sync_cleanup(bo, dst_mem);
+	}
+
+	i915_ttm_adjust_domains_after_move(obj);
+	i915_ttm_free_cached_io_rsgt(obj);
+
+	if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) {
+		obj->ttm.cached_io_rsgt = dst_rsgt;
+		obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
+		obj->ttm.get_io_page.sg_idx = 0;
+	} else {
+		i915_refct_sgt_put(dst_rsgt);
+	}
+
+	i915_ttm_adjust_lru(obj);
+	i915_ttm_adjust_gem_after_move(obj);
+	return 0;
+}
+
+/**
+ * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to
+ * another
+ * @dst: The destination object
+ * @src: The source object
+ * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used.
+ * @intr: Whether to perform waits interruptible:
+ *
+ * Note: The caller is responsible for assuring that the underlying
+ * TTM objects are populated if needed and locked.
+ *
+ * Return: Zero on success. Negative error code on error. If @intr == true,
+ * then it may return -ERESTARTSYS or -EINTR.
+ */
+int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
+			  struct drm_i915_gem_object *src,
+			  bool allow_accel, bool intr)
+{
+	struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst);
+	struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src);
+	struct ttm_operation_ctx ctx = {
+		.interruptible = intr,
+	};
+	struct i915_refct_sgt *dst_rsgt;
+	struct dma_fence *copy_fence;
+	struct i915_deps deps;
+	int ret;
+
+	assert_object_held(dst);
+	assert_object_held(src);
+
+	if (GEM_WARN_ON(!src_bo->resource || !dst_bo->resource))
+		return -EINVAL;
+
+	i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
+
+	ret = dma_resv_reserve_fences(src_bo->base.resv, 1);
+	if (ret)
+		return ret;
+
+	ret = dma_resv_reserve_fences(dst_bo->base.resv, 1);
+	if (ret)
+		return ret;
+
+	ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx);
+	if (ret)
+		return ret;
+
+	ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx);
+	if (ret)
+		return ret;
+
+	dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
+	copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource,
+				     dst_bo->ttm, dst_rsgt, allow_accel,
+				     &deps);
+
+	i915_deps_fini(&deps);
+	i915_refct_sgt_put(dst_rsgt);
+	if (IS_ERR_OR_NULL(copy_fence))
+		return PTR_ERR_OR_ZERO(copy_fence);
+
+	dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE);
+	dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ);
+	dma_fence_put(copy_fence);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
new file mode 100644
index 0000000000..8a5d5ab0cc
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+#ifndef _I915_GEM_TTM_MOVE_H_
+#define _I915_GEM_TTM_MOVE_H_
+
+#include <linux/types.h>
+
+#include "i915_selftest.h"
+
+struct ttm_buffer_object;
+struct ttm_operation_ctx;
+struct ttm_place;
+struct ttm_resource;
+struct ttm_tt;
+
+struct drm_i915_gem_object;
+struct i915_refct_sgt;
+
+int i915_ttm_move_notify(struct ttm_buffer_object *bo);
+
+I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
+							      bool work_allocation));
+I915_SELFTEST_DECLARE(void i915_ttm_migrate_set_ban_memcpy(bool ban));
+
+int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
+			  struct drm_i915_gem_object *src,
+			  bool allow_accel, bool intr);
+
+/* Internal I915 TTM declarations and definitions below. */
+
+int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
+		  struct ttm_operation_ctx *ctx,
+		  struct ttm_resource *dst_mem,
+		  struct ttm_place *hop);
+
+void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj);
+
+void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
new file mode 100644
index 0000000000..ad649523d5
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.c
@@ -0,0 +1,232 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#include <drm/ttm/ttm_placement.h>
+#include <drm/ttm/ttm_tt.h>
+
+#include "i915_drv.h"
+#include "intel_memory_region.h"
+#include "intel_region_ttm.h"
+
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
+#include "gem/i915_gem_ttm_pm.h"
+
+/**
+ * i915_ttm_backup_free - Free any backup attached to this object
+ * @obj: The object whose backup is to be freed.
+ */
+void i915_ttm_backup_free(struct drm_i915_gem_object *obj)
+{
+	if (obj->ttm.backup) {
+		i915_gem_object_put(obj->ttm.backup);
+		obj->ttm.backup = NULL;
+	}
+}
+
+/**
+ * struct i915_gem_ttm_pm_apply - Apply-to-region subclass for restore
+ * @base: The i915_gem_apply_to_region we derive from.
+ * @allow_gpu: Whether using the gpu blitter is allowed.
+ * @backup_pinned: On backup, backup also pinned objects.
+ */
+struct i915_gem_ttm_pm_apply {
+	struct i915_gem_apply_to_region base;
+	bool allow_gpu : 1;
+	bool backup_pinned : 1;
+};
+
+static int i915_ttm_backup(struct i915_gem_apply_to_region *apply,
+			   struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_ttm_pm_apply *pm_apply =
+		container_of(apply, typeof(*pm_apply), base);
+	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+	struct ttm_buffer_object *backup_bo;
+	struct drm_i915_private *i915 =
+		container_of(bo->bdev, typeof(*i915), bdev);
+	struct drm_i915_gem_object *backup;
+	struct ttm_operation_ctx ctx = {};
+	unsigned int flags;
+	int err = 0;
+
+	if (!i915_ttm_cpu_maps_iomem(bo->resource) || obj->ttm.backup)
+		return 0;
+
+	if (pm_apply->allow_gpu && i915_gem_object_evictable(obj))
+		return ttm_bo_validate(bo, i915_ttm_sys_placement(), &ctx);
+
+	if (!pm_apply->backup_pinned ||
+	    (pm_apply->allow_gpu && (obj->flags & I915_BO_ALLOC_PM_EARLY)))
+		return 0;
+
+	if (obj->flags & I915_BO_ALLOC_PM_VOLATILE)
+		return 0;
+
+	/*
+	 * It seems that we might have some framebuffers still pinned at this
+	 * stage, but for such objects we might also need to deal with the CCS
+	 * aux state. Make sure we force the save/restore of the CCS state,
+	 * otherwise we might observe display corruption, when returning from
+	 * suspend.
+	 */
+	flags = 0;
+	if (i915_gem_object_needs_ccs_pages(obj)) {
+		WARN_ON_ONCE(!i915_gem_object_is_framebuffer(obj));
+		WARN_ON_ONCE(!pm_apply->allow_gpu);
+
+		flags = I915_BO_ALLOC_CCS_AUX;
+	}
+	backup = i915_gem_object_create_region(i915->mm.regions[INTEL_REGION_SMEM],
+					       obj->base.size, 0, flags);
+	if (IS_ERR(backup))
+		return PTR_ERR(backup);
+
+	err = i915_gem_object_lock(backup, apply->ww);
+	if (err)
+		goto out_no_lock;
+
+	backup_bo = i915_gem_to_ttm(backup);
+	err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, &ctx);
+	if (err)
+		goto out_no_populate;
+
+	err = i915_gem_obj_copy_ttm(backup, obj, pm_apply->allow_gpu, false);
+	if (err) {
+		drm_err(&i915->drm,
+			"Unable to copy from device to system memory, err:%pe\n",
+			ERR_PTR(err));
+		goto out_no_populate;
+	}
+	ttm_bo_wait_ctx(backup_bo, &ctx);
+
+	obj->ttm.backup = backup;
+	return 0;
+
+out_no_populate:
+	i915_gem_ww_unlock_single(backup);
+out_no_lock:
+	i915_gem_object_put(backup);
+
+	return err;
+}
+
+static int i915_ttm_recover(struct i915_gem_apply_to_region *apply,
+			    struct drm_i915_gem_object *obj)
+{
+	i915_ttm_backup_free(obj);
+	return 0;
+}
+
+/**
+ * i915_ttm_recover_region - Free the backup of all objects of a region
+ * @mr: The memory region
+ *
+ * Checks all objects of a region if there is backup attached and if so
+ * frees that backup. Typically this is called to recover after a partially
+ * performed backup.
+ */
+void i915_ttm_recover_region(struct intel_memory_region *mr)
+{
+	static const struct i915_gem_apply_to_region_ops recover_ops = {
+		.process_obj = i915_ttm_recover,
+	};
+	struct i915_gem_apply_to_region apply = {.ops = &recover_ops};
+	int ret;
+
+	ret = i915_gem_process_region(mr, &apply);
+	GEM_WARN_ON(ret);
+}
+
+/**
+ * i915_ttm_backup_region - Back up all objects of a region to smem.
+ * @mr: The memory region
+ * @flags: TTM backup flags
+ *
+ * Loops over all objects of a region and either evicts them if they are
+ * evictable or backs them up using a backup object if they are pinned.
+ *
+ * Return: Zero on success. Negative error code on error.
+ */
+int i915_ttm_backup_region(struct intel_memory_region *mr, u32 flags)
+{
+	static const struct i915_gem_apply_to_region_ops backup_ops = {
+		.process_obj = i915_ttm_backup,
+	};
+	struct i915_gem_ttm_pm_apply pm_apply = {
+		.base = {.ops = &backup_ops},
+		.allow_gpu = flags & I915_TTM_BACKUP_ALLOW_GPU,
+		.backup_pinned = flags & I915_TTM_BACKUP_PINNED,
+	};
+
+	return i915_gem_process_region(mr, &pm_apply.base);
+}
+
+static int i915_ttm_restore(struct i915_gem_apply_to_region *apply,
+			    struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_ttm_pm_apply *pm_apply =
+		container_of(apply, typeof(*pm_apply), base);
+	struct drm_i915_gem_object *backup = obj->ttm.backup;
+	struct ttm_buffer_object *backup_bo = i915_gem_to_ttm(backup);
+	struct ttm_operation_ctx ctx = {};
+	int err;
+
+	if (!backup)
+		return 0;
+
+	if (!pm_apply->allow_gpu && !(obj->flags & I915_BO_ALLOC_PM_EARLY))
+		return 0;
+
+	err = i915_gem_object_lock(backup, apply->ww);
+	if (err)
+		return err;
+
+	/* Content may have been swapped. */
+	if (!backup_bo->resource)
+		err = ttm_bo_validate(backup_bo, i915_ttm_sys_placement(), &ctx);
+	if (!err)
+		err = ttm_tt_populate(backup_bo->bdev, backup_bo->ttm, &ctx);
+	if (!err) {
+		err = i915_gem_obj_copy_ttm(obj, backup, pm_apply->allow_gpu,
+					    false);
+		GEM_WARN_ON(err);
+		ttm_bo_wait_ctx(backup_bo, &ctx);
+
+		obj->ttm.backup = NULL;
+		err = 0;
+	}
+
+	i915_gem_ww_unlock_single(backup);
+
+	if (!err)
+		i915_gem_object_put(backup);
+
+	return err;
+}
+
+/**
+ * i915_ttm_restore_region - Restore backed-up objects of a region from smem.
+ * @mr: The memory region
+ * @flags: TTM backup flags
+ *
+ * Loops over all objects of a region and if they are backed-up, restores
+ * them from smem.
+ *
+ * Return: Zero on success. Negative error code on error.
+ */
+int i915_ttm_restore_region(struct intel_memory_region *mr, u32 flags)
+{
+	static const struct i915_gem_apply_to_region_ops restore_ops = {
+		.process_obj = i915_ttm_restore,
+	};
+	struct i915_gem_ttm_pm_apply pm_apply = {
+		.base = {.ops = &restore_ops},
+		.allow_gpu = flags & I915_TTM_BACKUP_ALLOW_GPU,
+	};
+
+	return i915_gem_process_region(mr, &pm_apply.base);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h
new file mode 100644
index 0000000000..25ed67a315
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_pm.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef _I915_GEM_TTM_PM_H_
+#define _I915_GEM_TTM_PM_H_
+
+#include <linux/types.h>
+
+struct intel_memory_region;
+struct drm_i915_gem_object;
+
+#define I915_TTM_BACKUP_ALLOW_GPU BIT(0)
+#define I915_TTM_BACKUP_PINNED    BIT(1)
+
+int i915_ttm_backup_region(struct intel_memory_region *mr, u32 flags);
+
+void i915_ttm_recover_region(struct intel_memory_region *mr);
+
+int i915_ttm_restore_region(struct intel_memory_region *mr, u32 flags);
+
+/* Internal I915 TTM functions below. */
+void i915_ttm_backup_free(struct drm_i915_gem_object *obj);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
new file mode 100644
index 0000000000..1d3ebdf406
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -0,0 +1,594 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2012-2014 Intel Corporation
+ *
+  * Based on amdgpu_mn, which bears the following notice:
+ *
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/mmu_context.h>
+#include <linux/mempolicy.h>
+#include <linux/swap.h>
+#include <linux/sched/mm.h>
+
+#include "i915_drv.h"
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+#include "i915_gem_userptr.h"
+#include "i915_scatterlist.h"
+
+#ifdef CONFIG_MMU_NOTIFIER
+
+/**
+ * i915_gem_userptr_invalidate - callback to notify about mm change
+ *
+ * @mni: the range (mm) is about to update
+ * @range: details on the invalidation
+ * @cur_seq: Value to pass to mmu_interval_set_seq()
+ *
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
+ */
+static bool i915_gem_userptr_invalidate(struct mmu_interval_notifier *mni,
+					const struct mmu_notifier_range *range,
+					unsigned long cur_seq)
+{
+	struct drm_i915_gem_object *obj = container_of(mni, struct drm_i915_gem_object, userptr.notifier);
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	long r;
+
+	if (!mmu_notifier_range_blockable(range))
+		return false;
+
+	write_lock(&i915->mm.notifier_lock);
+
+	mmu_interval_set_seq(mni, cur_seq);
+
+	write_unlock(&i915->mm.notifier_lock);
+
+	/*
+	 * We don't wait when the process is exiting. This is valid
+	 * because the object will be cleaned up anyway.
+	 *
+	 * This is also temporarily required as a hack, because we
+	 * cannot currently force non-consistent batch buffers to preempt
+	 * and reschedule by waiting on it, hanging processes on exit.
+	 */
+	if (current->flags & PF_EXITING)
+		return true;
+
+	/* we will unbind on next submission, still have userptr pins */
+	r = dma_resv_wait_timeout(obj->base.resv, DMA_RESV_USAGE_BOOKKEEP, false,
+				  MAX_SCHEDULE_TIMEOUT);
+	if (r <= 0)
+		drm_err(&i915->drm, "(%ld) failed to wait for idle\n", r);
+
+	return true;
+}
+
+static const struct mmu_interval_notifier_ops i915_gem_userptr_notifier_ops = {
+	.invalidate = i915_gem_userptr_invalidate,
+};
+
+static int
+i915_gem_userptr_init__mmu_notifier(struct drm_i915_gem_object *obj)
+{
+	return mmu_interval_notifier_insert(&obj->userptr.notifier, current->mm,
+					    obj->userptr.ptr, obj->base.size,
+					    &i915_gem_userptr_notifier_ops);
+}
+
+static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj)
+{
+	struct page **pvec = NULL;
+
+	assert_object_held_shared(obj);
+
+	if (!--obj->userptr.page_ref) {
+		pvec = obj->userptr.pvec;
+		obj->userptr.pvec = NULL;
+	}
+	GEM_BUG_ON(obj->userptr.page_ref < 0);
+
+	if (pvec) {
+		const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
+
+		unpin_user_pages(pvec, num_pages);
+		kvfree(pvec);
+	}
+}
+
+static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj)
+{
+	unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev);
+	struct sg_table *st;
+	struct page **pvec;
+	unsigned int num_pages; /* limited by sg_alloc_table_from_pages_segment */
+	int ret;
+
+	if (overflows_type(obj->base.size >> PAGE_SHIFT, num_pages))
+		return -E2BIG;
+
+	num_pages = obj->base.size >> PAGE_SHIFT;
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return -ENOMEM;
+
+	if (!obj->userptr.page_ref) {
+		ret = -EAGAIN;
+		goto err_free;
+	}
+
+	obj->userptr.page_ref++;
+	pvec = obj->userptr.pvec;
+
+alloc_table:
+	ret = sg_alloc_table_from_pages_segment(st, pvec, num_pages, 0,
+						num_pages << PAGE_SHIFT,
+						max_segment, GFP_KERNEL);
+	if (ret)
+		goto err;
+
+	ret = i915_gem_gtt_prepare_pages(obj, st);
+	if (ret) {
+		sg_free_table(st);
+
+		if (max_segment > PAGE_SIZE) {
+			max_segment = PAGE_SIZE;
+			goto alloc_table;
+		}
+
+		goto err;
+	}
+
+	WARN_ON_ONCE(!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE));
+	if (i915_gem_object_can_bypass_llc(obj))
+		obj->cache_dirty = true;
+
+	__i915_gem_object_set_pages(obj, st);
+
+	return 0;
+
+err:
+	i915_gem_object_userptr_drop_ref(obj);
+err_free:
+	kfree(st);
+	return ret;
+}
+
+static void
+i915_gem_userptr_put_pages(struct drm_i915_gem_object *obj,
+			   struct sg_table *pages)
+{
+	struct sgt_iter sgt_iter;
+	struct page *page;
+
+	if (!pages)
+		return;
+
+	__i915_gem_object_release_shmem(obj, pages, true);
+	i915_gem_gtt_finish_pages(obj, pages);
+
+	/*
+	 * We always mark objects as dirty when they are used by the GPU,
+	 * just in case. However, if we set the vma as being read-only we know
+	 * that the object will never have been written to.
+	 */
+	if (i915_gem_object_is_readonly(obj))
+		obj->mm.dirty = false;
+
+	for_each_sgt_page(page, sgt_iter, pages) {
+		if (obj->mm.dirty && trylock_page(page)) {
+			/*
+			 * As this may not be anonymous memory (e.g. shmem)
+			 * but exist on a real mapping, we have to lock
+			 * the page in order to dirty it -- holding
+			 * the page reference is not sufficient to
+			 * prevent the inode from being truncated.
+			 * Play safe and take the lock.
+			 *
+			 * However...!
+			 *
+			 * The mmu-notifier can be invalidated for a
+			 * migrate_folio, that is alreadying holding the lock
+			 * on the folio. Such a try_to_unmap() will result
+			 * in us calling put_pages() and so recursively try
+			 * to lock the page. We avoid that deadlock with
+			 * a trylock_page() and in exchange we risk missing
+			 * some page dirtying.
+			 */
+			set_page_dirty(page);
+			unlock_page(page);
+		}
+
+		mark_page_accessed(page);
+	}
+	obj->mm.dirty = false;
+
+	sg_free_table(pages);
+	kfree(pages);
+
+	i915_gem_object_userptr_drop_ref(obj);
+}
+
+static int i915_gem_object_userptr_unbind(struct drm_i915_gem_object *obj)
+{
+	struct sg_table *pages;
+	int err;
+
+	err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	if (err)
+		return err;
+
+	if (GEM_WARN_ON(i915_gem_object_has_pinned_pages(obj)))
+		return -EBUSY;
+
+	assert_object_held(obj);
+
+	pages = __i915_gem_object_unset_pages(obj);
+	if (!IS_ERR_OR_NULL(pages))
+		i915_gem_userptr_put_pages(obj, pages);
+
+	return err;
+}
+
+int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj)
+{
+	const unsigned long num_pages = obj->base.size >> PAGE_SHIFT;
+	struct page **pvec;
+	unsigned int gup_flags = 0;
+	unsigned long notifier_seq;
+	int pinned, ret;
+
+	if (obj->userptr.notifier.mm != current->mm)
+		return -EFAULT;
+
+	notifier_seq = mmu_interval_read_begin(&obj->userptr.notifier);
+
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
+	if (ret)
+		return ret;
+
+	if (notifier_seq == obj->userptr.notifier_seq && obj->userptr.pvec) {
+		i915_gem_object_unlock(obj);
+		return 0;
+	}
+
+	ret = i915_gem_object_userptr_unbind(obj);
+	i915_gem_object_unlock(obj);
+	if (ret)
+		return ret;
+
+	pvec = kvmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL);
+	if (!pvec)
+		return -ENOMEM;
+
+	if (!i915_gem_object_is_readonly(obj))
+		gup_flags |= FOLL_WRITE;
+
+	pinned = 0;
+	while (pinned < num_pages) {
+		ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE,
+					  num_pages - pinned, gup_flags,
+					  &pvec[pinned]);
+		if (ret < 0)
+			goto out;
+
+		pinned += ret;
+	}
+
+	ret = i915_gem_object_lock_interruptible(obj, NULL);
+	if (ret)
+		goto out;
+
+	if (mmu_interval_read_retry(&obj->userptr.notifier,
+		!obj->userptr.page_ref ? notifier_seq :
+		obj->userptr.notifier_seq)) {
+		ret = -EAGAIN;
+		goto out_unlock;
+	}
+
+	if (!obj->userptr.page_ref++) {
+		obj->userptr.pvec = pvec;
+		obj->userptr.notifier_seq = notifier_seq;
+		pvec = NULL;
+		ret = ____i915_gem_object_get_pages(obj);
+	}
+
+	obj->userptr.page_ref--;
+
+out_unlock:
+	i915_gem_object_unlock(obj);
+
+out:
+	if (pvec) {
+		unpin_user_pages(pvec, pinned);
+		kvfree(pvec);
+	}
+
+	return ret;
+}
+
+int i915_gem_object_userptr_submit_done(struct drm_i915_gem_object *obj)
+{
+	if (mmu_interval_read_retry(&obj->userptr.notifier,
+				    obj->userptr.notifier_seq)) {
+		/* We collided with the mmu notifier, need to retry */
+
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+
+int i915_gem_object_userptr_validate(struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	err = i915_gem_object_userptr_submit_init(obj);
+	if (err)
+		return err;
+
+	err = i915_gem_object_lock_interruptible(obj, NULL);
+	if (!err) {
+		/*
+		 * Since we only check validity, not use the pages,
+		 * it doesn't matter if we collide with the mmu notifier,
+		 * and -EAGAIN handling is not required.
+		 */
+		err = i915_gem_object_pin_pages(obj);
+		if (!err)
+			i915_gem_object_unpin_pages(obj);
+
+		i915_gem_object_unlock(obj);
+	}
+
+	return err;
+}
+
+static void
+i915_gem_userptr_release(struct drm_i915_gem_object *obj)
+{
+	GEM_WARN_ON(obj->userptr.page_ref);
+
+	mmu_interval_notifier_remove(&obj->userptr.notifier);
+	obj->userptr.notifier.mm = NULL;
+}
+
+static int
+i915_gem_userptr_dmabuf_export(struct drm_i915_gem_object *obj)
+{
+	drm_dbg(obj->base.dev, "Exporting userptr no longer allowed\n");
+
+	return -EINVAL;
+}
+
+static int
+i915_gem_userptr_pwrite(struct drm_i915_gem_object *obj,
+			const struct drm_i915_gem_pwrite *args)
+{
+	drm_dbg(obj->base.dev, "pwrite to userptr no longer allowed\n");
+
+	return -EINVAL;
+}
+
+static int
+i915_gem_userptr_pread(struct drm_i915_gem_object *obj,
+		       const struct drm_i915_gem_pread *args)
+{
+	drm_dbg(obj->base.dev, "pread from userptr no longer allowed\n");
+
+	return -EINVAL;
+}
+
+static const struct drm_i915_gem_object_ops i915_gem_userptr_ops = {
+	.name = "i915_gem_object_userptr",
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE |
+		 I915_GEM_OBJECT_NO_MMAP |
+		 I915_GEM_OBJECT_IS_PROXY,
+	.get_pages = i915_gem_userptr_get_pages,
+	.put_pages = i915_gem_userptr_put_pages,
+	.dmabuf_export = i915_gem_userptr_dmabuf_export,
+	.pwrite = i915_gem_userptr_pwrite,
+	.pread = i915_gem_userptr_pread,
+	.release = i915_gem_userptr_release,
+};
+
+#endif
+
+static int
+probe_range(struct mm_struct *mm, unsigned long addr, unsigned long len)
+{
+	VMA_ITERATOR(vmi, mm, addr);
+	struct vm_area_struct *vma;
+	unsigned long end = addr + len;
+
+	mmap_read_lock(mm);
+	for_each_vma_range(vmi, vma, end) {
+		/* Check for holes, note that we also update the addr below */
+		if (vma->vm_start > addr)
+			break;
+
+		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+			break;
+
+		addr = vma->vm_end;
+	}
+	mmap_read_unlock(mm);
+
+	if (vma || addr < end)
+		return -EFAULT;
+	return 0;
+}
+
+/*
+ * Creates a new mm object that wraps some normal memory from the process
+ * context - user memory.
+ *
+ * We impose several restrictions upon the memory being mapped
+ * into the GPU.
+ * 1. It must be page aligned (both start/end addresses, i.e ptr and size).
+ * 2. It must be normal system memory, not a pointer into another map of IO
+ *    space (e.g. it must not be a GTT mmapping of another object).
+ * 3. We only allow a bo as large as we could in theory map into the GTT,
+ *    that is we limit the size to the total size of the GTT.
+ * 4. The bo is marked as being snoopable. The backing pages are left
+ *    accessible directly by the CPU, but reads and writes by the GPU may
+ *    incur the cost of a snoop (unless you have an LLC architecture).
+ *
+ * Synchronisation between multiple users and the GPU is left to userspace
+ * through the normal set-domain-ioctl. The kernel will enforce that the
+ * GPU relinquishes the VMA before it is returned back to the system
+ * i.e. upon free(), munmap() or process termination. However, the userspace
+ * malloc() library may not immediately relinquish the VMA after free() and
+ * instead reuse it whilst the GPU is still reading and writing to the VMA.
+ * Caveat emptor.
+ *
+ * Also note, that the object created here is not currently a "first class"
+ * object, in that several ioctls are banned. These are the CPU access
+ * ioctls: mmap(), pwrite and pread. In practice, you are expected to use
+ * direct access via your pointer rather than use those ioctls. Another
+ * restriction is that we do not allow userptr surfaces to be pinned to the
+ * hardware and so we reject any attempt to create a framebuffer out of a
+ * userptr.
+ *
+ * If you think this is a good interface to use to pass GPU memory between
+ * drivers, please use dma-buf instead. In fact, wherever possible use
+ * dma-buf instead.
+ */
+int
+i915_gem_userptr_ioctl(struct drm_device *dev,
+		       void *data,
+		       struct drm_file *file)
+{
+	static struct lock_class_key __maybe_unused lock_class;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_userptr *args = data;
+	struct drm_i915_gem_object __maybe_unused *obj;
+	int __maybe_unused ret;
+	u32 __maybe_unused handle;
+
+	if (!HAS_LLC(dev_priv) && !HAS_SNOOP(dev_priv)) {
+		/* We cannot support coherent userptr objects on hw without
+		 * LLC and broken snooping.
+		 */
+		return -ENODEV;
+	}
+
+	if (args->flags & ~(I915_USERPTR_READ_ONLY |
+			    I915_USERPTR_UNSYNCHRONIZED |
+			    I915_USERPTR_PROBE))
+		return -EINVAL;
+
+	if (i915_gem_object_size_2big(args->user_size))
+		return -E2BIG;
+
+	if (!args->user_size)
+		return -EINVAL;
+
+	if (offset_in_page(args->user_ptr | args->user_size))
+		return -EINVAL;
+
+	if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
+		return -EFAULT;
+
+	if (args->flags & I915_USERPTR_UNSYNCHRONIZED)
+		return -ENODEV;
+
+	if (args->flags & I915_USERPTR_READ_ONLY) {
+		/*
+		 * On almost all of the older hw, we cannot tell the GPU that
+		 * a page is readonly.
+		 */
+		if (!to_gt(dev_priv)->vm->has_read_only)
+			return -ENODEV;
+	}
+
+	if (args->flags & I915_USERPTR_PROBE) {
+		/*
+		 * Check that the range pointed to represents real struct
+		 * pages and not iomappings (at this moment in time!)
+		 */
+		ret = probe_range(current->mm, args->user_ptr, args->user_size);
+		if (ret)
+			return ret;
+	}
+
+#ifdef CONFIG_MMU_NOTIFIER
+	obj = i915_gem_object_alloc();
+	if (obj == NULL)
+		return -ENOMEM;
+
+	drm_gem_private_object_init(dev, &obj->base, args->user_size);
+	i915_gem_object_init(obj, &i915_gem_userptr_ops, &lock_class,
+			     I915_BO_ALLOC_USER);
+	obj->mem_flags = I915_BO_FLAG_STRUCT_PAGE;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC);
+
+	obj->userptr.ptr = args->user_ptr;
+	obj->userptr.notifier_seq = ULONG_MAX;
+	if (args->flags & I915_USERPTR_READ_ONLY)
+		i915_gem_object_set_readonly(obj);
+
+	/* And keep a pointer to the current->mm for resolving the user pages
+	 * at binding. This means that we need to hook into the mmu_notifier
+	 * in order to detect if the mmu is destroyed.
+	 */
+	ret = i915_gem_userptr_init__mmu_notifier(obj);
+	if (ret == 0)
+		ret = drm_gem_handle_create(file, &obj->base, &handle);
+
+	/* drop reference from allocate - handle holds it now */
+	i915_gem_object_put(obj);
+	if (ret)
+		return ret;
+
+	args->handle = handle;
+	return 0;
+#else
+	return -ENODEV;
+#endif
+}
+
+int i915_gem_init_userptr(struct drm_i915_private *dev_priv)
+{
+#ifdef CONFIG_MMU_NOTIFIER
+	rwlock_init(&dev_priv->mm.notifier_lock);
+#endif
+
+	return 0;
+}
+
+void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv)
+{
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.h b/drivers/gpu/drm/i915/gem/i915_gem_userptr.h
new file mode 100644
index 0000000000..8dadb2f843
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2021 Intel Corporation
+ */
+
+#ifndef __I915_GEM_USERPTR_H__
+#define __I915_GEM_USERPTR_H__
+
+struct drm_i915_private;
+
+int i915_gem_init_userptr(struct drm_i915_private *dev_priv);
+void i915_gem_cleanup_userptr(struct drm_i915_private *dev_priv);
+
+#endif /* __I915_GEM_USERPTR_H__ */
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
new file mode 100644
index 0000000000..d4b918fb11
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c
@@ -0,0 +1,295 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/dma-fence-array.h>
+#include <linux/dma-fence-chain.h>
+#include <linux/jiffies.h>
+
+#include "gt/intel_engine.h"
+#include "gt/intel_rps.h"
+
+#include "i915_gem_ioctls.h"
+#include "i915_gem_object.h"
+
+static long
+i915_gem_object_wait_fence(struct dma_fence *fence,
+			   unsigned int flags,
+			   long timeout)
+{
+	BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
+
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return timeout;
+
+	if (dma_fence_is_i915(fence))
+		return i915_request_wait_timeout(to_request(fence), flags, timeout);
+
+	return dma_fence_wait_timeout(fence,
+				      flags & I915_WAIT_INTERRUPTIBLE,
+				      timeout);
+}
+
+static void
+i915_gem_object_boost(struct dma_resv *resv, unsigned int flags)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+
+	/*
+	 * Prescan all fences for potential boosting before we begin waiting.
+	 *
+	 * When we wait, we wait on outstanding fences serially. If the
+	 * dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced
+	 * form 1:2, then as we look at each wait in turn we see that each
+	 * request is currently executing and not worthy of boosting. But if
+	 * we only happen to look at the final fence in the sequence (because
+	 * of request coalescing or splitting between read/write arrays by
+	 * the iterator), then we would boost. As such our decision to boost
+	 * or not is delicately balanced on the order we wait on fences.
+	 *
+	 * So instead of looking for boosts sequentially, look for all boosts
+	 * upfront and then wait on the outstanding fences.
+	 */
+
+	dma_resv_iter_begin(&cursor, resv,
+			    dma_resv_usage_rw(flags & I915_WAIT_ALL));
+	dma_resv_for_each_fence_unlocked(&cursor, fence)
+		if (dma_fence_is_i915(fence) &&
+		    !i915_request_started(to_request(fence)))
+			intel_rps_boost(to_request(fence));
+	dma_resv_iter_end(&cursor);
+}
+
+static long
+i915_gem_object_wait_reservation(struct dma_resv *resv,
+				 unsigned int flags,
+				 long timeout)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+	long ret = timeout ?: 1;
+
+	i915_gem_object_boost(resv, flags);
+
+	dma_resv_iter_begin(&cursor, resv,
+			    dma_resv_usage_rw(flags & I915_WAIT_ALL));
+	dma_resv_for_each_fence_unlocked(&cursor, fence) {
+		ret = i915_gem_object_wait_fence(fence, flags, timeout);
+		if (ret <= 0)
+			break;
+
+		if (timeout)
+			timeout = ret;
+	}
+	dma_resv_iter_end(&cursor);
+
+	return ret;
+}
+
+static void fence_set_priority(struct dma_fence *fence,
+			       const struct i915_sched_attr *attr)
+{
+	struct i915_request *rq;
+	struct intel_engine_cs *engine;
+
+	if (dma_fence_is_signaled(fence) || !dma_fence_is_i915(fence))
+		return;
+
+	rq = to_request(fence);
+	engine = rq->engine;
+
+	rcu_read_lock(); /* RCU serialisation for set-wedged protection */
+	if (engine->sched_engine->schedule)
+		engine->sched_engine->schedule(rq, attr);
+	rcu_read_unlock();
+}
+
+static inline bool __dma_fence_is_chain(const struct dma_fence *fence)
+{
+	return fence->ops == &dma_fence_chain_ops;
+}
+
+void i915_gem_fence_wait_priority(struct dma_fence *fence,
+				  const struct i915_sched_attr *attr)
+{
+	if (dma_fence_is_signaled(fence))
+		return;
+
+	local_bh_disable();
+
+	/* Recurse once into a fence-array */
+	if (dma_fence_is_array(fence)) {
+		struct dma_fence_array *array = to_dma_fence_array(fence);
+		int i;
+
+		for (i = 0; i < array->num_fences; i++)
+			fence_set_priority(array->fences[i], attr);
+	} else if (__dma_fence_is_chain(fence)) {
+		struct dma_fence *iter;
+
+		/* The chain is ordered; if we boost the last, we boost all */
+		dma_fence_chain_for_each(iter, fence) {
+			fence_set_priority(to_dma_fence_chain(iter)->fence,
+					   attr);
+			break;
+		}
+		dma_fence_put(iter);
+	} else {
+		fence_set_priority(fence, attr);
+	}
+
+	local_bh_enable(); /* kick the tasklets if queues were reprioritised */
+}
+
+int
+i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
+			      unsigned int flags,
+			      const struct i915_sched_attr *attr)
+{
+	struct dma_resv_iter cursor;
+	struct dma_fence *fence;
+
+	dma_resv_iter_begin(&cursor, obj->base.resv,
+			    dma_resv_usage_rw(flags & I915_WAIT_ALL));
+	dma_resv_for_each_fence_unlocked(&cursor, fence)
+		i915_gem_fence_wait_priority(fence, attr);
+	dma_resv_iter_end(&cursor);
+	return 0;
+}
+
+/**
+ * i915_gem_object_wait - Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ */
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+		     unsigned int flags,
+		     long timeout)
+{
+	might_sleep();
+	GEM_BUG_ON(timeout < 0);
+
+	timeout = i915_gem_object_wait_reservation(obj->base.resv,
+						   flags, timeout);
+
+	if (timeout < 0)
+		return timeout;
+
+	return !timeout ? -ETIME : 0;
+}
+
+static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
+{
+	/* nsecs_to_jiffies64() does not guard against overflow */
+	if ((NSEC_PER_SEC % HZ) != 0 &&
+	    div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
+		return MAX_JIFFY_OFFSET;
+
+	return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1);
+}
+
+static unsigned long to_wait_timeout(s64 timeout_ns)
+{
+	if (timeout_ns < 0)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	if (timeout_ns == 0)
+		return 0;
+
+	return nsecs_to_jiffies_timeout(timeout_ns);
+}
+
+/**
+ * i915_gem_wait_ioctl - implements DRM_IOCTL_I915_GEM_WAIT
+ * @dev: drm device pointer
+ * @data: ioctl data blob
+ * @file: drm file pointer
+ *
+ * Returns 0 if successful, else an error is returned with the remaining time in
+ * the timeout parameter.
+ *  -ETIME: object is still busy after timeout
+ *  -ERESTARTSYS: signal interrupted the wait
+ *  -ENONENT: object doesn't exist
+ * Also possible, but rare:
+ *  -EAGAIN: incomplete, restart syscall
+ *  -ENOMEM: damn
+ *  -ENODEV: Internal IRQ fail
+ *  -E?: The add request failed
+ *
+ * The wait ioctl with a timeout of 0 reimplements the busy ioctl. With any
+ * non-zero timeout parameter the wait ioctl will wait for the given number of
+ * nanoseconds on an object becoming unbusy. Since the wait itself does so
+ * without holding struct_mutex the object may become re-busied before this
+ * function completes. A similar but shorter * race condition exists in the busy
+ * ioctl
+ */
+int
+i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+	struct drm_i915_gem_wait *args = data;
+	struct drm_i915_gem_object *obj;
+	ktime_t start;
+	long ret;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	obj = i915_gem_object_lookup(file, args->bo_handle);
+	if (!obj)
+		return -ENOENT;
+
+	start = ktime_get();
+
+	ret = i915_gem_object_wait(obj,
+				   I915_WAIT_INTERRUPTIBLE |
+				   I915_WAIT_PRIORITY |
+				   I915_WAIT_ALL,
+				   to_wait_timeout(args->timeout_ns));
+
+	if (args->timeout_ns > 0) {
+		args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+		if (args->timeout_ns < 0)
+			args->timeout_ns = 0;
+
+		/*
+		 * Apparently ktime isn't accurate enough and occasionally has a
+		 * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
+		 * things up to make the test happy. We allow up to 1 jiffy.
+		 *
+		 * This is a regression from the timespec->ktime conversion.
+		 */
+		if (ret == -ETIME && !nsecs_to_jiffies(args->timeout_ns))
+			args->timeout_ns = 0;
+
+		/* Asked to wait beyond the jiffie/scheduler precision? */
+		if (ret == -ETIME && args->timeout_ns)
+			ret = -EAGAIN;
+	}
+
+	i915_gem_object_put(obj);
+	return ret;
+}
+
+/**
+ * i915_gem_object_wait_migration - Sync an accelerated migration operation
+ * @obj: The migrating object.
+ * @flags: waiting flags. Currently supports only I915_WAIT_INTERRUPTIBLE.
+ *
+ * Wait for any pending async migration operation on the object,
+ * whether it's explicitly (i915_gem_object_migrate()) or implicitly
+ * (swapin, initial clearing) initiated.
+ *
+ * Return: 0 if successful, -ERESTARTSYS if a signal was hit during waiting.
+ */
+int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj,
+				   unsigned int flags)
+{
+	might_sleep();
+
+	return i915_gem_object_wait_moving_fence(obj, !!(flags & I915_WAIT_INTERRUPTIBLE));
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.c b/drivers/gpu/drm/i915/gem/i915_gemfs.c
new file mode 100644
index 0000000000..46b9a17d6a
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.c
@@ -0,0 +1,60 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+
+#include "i915_drv.h"
+#include "i915_gemfs.h"
+#include "i915_utils.h"
+
+void i915_gemfs_init(struct drm_i915_private *i915)
+{
+	char huge_opt[] = "huge=within_size"; /* r/w */
+	struct file_system_type *type;
+	struct vfsmount *gemfs;
+
+	/*
+	 * By creating our own shmemfs mountpoint, we can pass in
+	 * mount flags that better match our usecase.
+	 *
+	 * One example, although it is probably better with a per-file
+	 * control, is selecting huge page allocations ("huge=within_size").
+	 * However, we only do so on platforms which benefit from it, or to
+	 * offset the overhead of iommu lookups, where with latter it is a net
+	 * win even on platforms which would otherwise see some performance
+	 * regressions such a slow reads issue on Broadwell and Skylake.
+	 */
+
+	if (GRAPHICS_VER(i915) < 11 && !i915_vtd_active(i915))
+		return;
+
+	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+		goto err;
+
+	type = get_fs_type("tmpfs");
+	if (!type)
+		goto err;
+
+	gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
+	if (IS_ERR(gemfs))
+		goto err;
+
+	i915->mm.gemfs = gemfs;
+	drm_info(&i915->drm, "Using Transparent Hugepages\n");
+	return;
+
+err:
+	drm_notice(&i915->drm,
+		   "Transparent Hugepage support is recommended for optimal performance%s\n",
+		   GRAPHICS_VER(i915) >= 11 ? " on this platform!" :
+					      " when IOMMU is enabled!");
+}
+
+void i915_gemfs_fini(struct drm_i915_private *i915)
+{
+	kern_unmount(i915->mm.gemfs);
+}
diff --git a/drivers/gpu/drm/i915/gem/i915_gemfs.h b/drivers/gpu/drm/i915/gem/i915_gemfs.h
new file mode 100644
index 0000000000..5d835e44c4
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/i915_gemfs.h
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#ifndef __I915_GEMFS_H__
+#define __I915_GEMFS_H__
+
+struct drm_i915_private;
+
+void i915_gemfs_init(struct drm_i915_private *i915);
+void i915_gemfs_fini(struct drm_i915_private *i915);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
new file mode 100644
index 0000000000..bac9577550
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c
@@ -0,0 +1,131 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_scatterlist.h"
+
+#include "huge_gem_object.h"
+
+static void huge_free_pages(struct drm_i915_gem_object *obj,
+			    struct sg_table *pages)
+{
+	unsigned long nreal = obj->scratch / PAGE_SIZE;
+	struct sgt_iter sgt_iter;
+	struct page *page;
+
+	for_each_sgt_page(page, sgt_iter, pages) {
+		__free_page(page);
+		if (!--nreal)
+			break;
+	}
+
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static int huge_get_pages(struct drm_i915_gem_object *obj)
+{
+#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL)
+	const unsigned long nreal = obj->scratch / PAGE_SIZE;
+	unsigned int npages; /* restricted by sg_alloc_table */
+	struct scatterlist *sg, *src, *end;
+	struct sg_table *pages;
+	unsigned long n;
+
+	if (overflows_type(obj->base.size / PAGE_SIZE, npages))
+		return -E2BIG;
+
+	npages = obj->base.size / PAGE_SIZE;
+	pages = kmalloc(sizeof(*pages), GFP);
+	if (!pages)
+		return -ENOMEM;
+
+	if (sg_alloc_table(pages, npages, GFP)) {
+		kfree(pages);
+		return -ENOMEM;
+	}
+
+	sg = pages->sgl;
+	for (n = 0; n < nreal; n++) {
+		struct page *page;
+
+		page = alloc_page(GFP | __GFP_HIGHMEM);
+		if (!page) {
+			sg_mark_end(sg);
+			goto err;
+		}
+
+		sg_set_page(sg, page, PAGE_SIZE, 0);
+		sg = __sg_next(sg);
+	}
+	if (nreal < npages) {
+		for (end = sg, src = pages->sgl; sg; sg = __sg_next(sg)) {
+			sg_set_page(sg, sg_page(src), PAGE_SIZE, 0);
+			src = __sg_next(src);
+			if (src == end)
+				src = pages->sgl;
+		}
+	}
+
+	if (i915_gem_gtt_prepare_pages(obj, pages))
+		goto err;
+
+	__i915_gem_object_set_pages(obj, pages);
+
+	return 0;
+
+err:
+	huge_free_pages(obj, pages);
+	return -ENOMEM;
+#undef GFP
+}
+
+static void huge_put_pages(struct drm_i915_gem_object *obj,
+			   struct sg_table *pages)
+{
+	i915_gem_gtt_finish_pages(obj, pages);
+	huge_free_pages(obj, pages);
+
+	obj->mm.dirty = false;
+}
+
+static const struct drm_i915_gem_object_ops huge_ops = {
+	.name = "huge-gem",
+	.get_pages = huge_get_pages,
+	.put_pages = huge_put_pages,
+};
+
+struct drm_i915_gem_object *
+huge_gem_object(struct drm_i915_private *i915,
+		phys_addr_t phys_size,
+		dma_addr_t dma_size)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_gem_object *obj;
+	unsigned int cache_level;
+
+	GEM_BUG_ON(!phys_size || phys_size > dma_size);
+	GEM_BUG_ON(!IS_ALIGNED(phys_size, PAGE_SIZE));
+	GEM_BUG_ON(!IS_ALIGNED(dma_size, I915_GTT_PAGE_SIZE));
+
+	if (overflows_type(dma_size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, dma_size);
+	i915_gem_object_init(obj, &huge_ops, &lock_class, 0);
+	obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+	obj->scratch = phys_size;
+
+	return obj;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
new file mode 100644
index 0000000000..b8cf31b7bf
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.h
@@ -0,0 +1,33 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __HUGE_GEM_OBJECT_H
+#define __HUGE_GEM_OBJECT_H
+
+#include <linux/types.h>
+
+#include "gem/i915_gem_object_types.h"
+
+struct drm_i915_private;
+
+struct drm_i915_gem_object *
+huge_gem_object(struct drm_i915_private *i915,
+		phys_addr_t phys_size,
+		dma_addr_t dma_size);
+
+static inline phys_addr_t
+huge_gem_object_phys_size(struct drm_i915_gem_object *obj)
+{
+	return obj->scratch;
+}
+
+static inline dma_addr_t
+huge_gem_object_dma_size(struct drm_i915_gem_object *obj)
+{
+	return obj->base.size;
+}
+
+#endif /* !__HUGE_GEM_OBJECT_H */
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
new file mode 100644
index 0000000000..6b9f6cf50b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -0,0 +1,2034 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+#include <linux/string_helpers.h>
+#include <linux/swap.h>
+
+#include "i915_selftest.h"
+
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_pm.h"
+#include "gem/i915_gem_region.h"
+
+#include "gt/intel_gt.h"
+
+#include "igt_gem_utils.h"
+#include "mock_context.h"
+
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gem_device.h"
+#include "selftests/mock_region.h"
+#include "selftests/i915_random.h"
+
+static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915,
+					     struct file *file)
+{
+	struct i915_gem_context *ctx = live_context(i915, file);
+	struct i915_address_space *vm;
+
+	if (IS_ERR(ctx))
+		return ctx;
+
+	vm = ctx->vm;
+	if (vm)
+		WRITE_ONCE(vm->scrub_64K, true);
+
+	return ctx;
+}
+
+static const unsigned int page_sizes[] = {
+	I915_GTT_PAGE_SIZE_2M,
+	I915_GTT_PAGE_SIZE_64K,
+	I915_GTT_PAGE_SIZE_4K,
+};
+
+static unsigned int get_largest_page_size(struct drm_i915_private *i915,
+					  u64 rem)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
+		unsigned int page_size = page_sizes[i];
+
+		if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size)
+			return page_size;
+	}
+
+	return 0;
+}
+
+static void huge_pages_free_pages(struct sg_table *st)
+{
+	struct scatterlist *sg;
+
+	for (sg = st->sgl; sg; sg = __sg_next(sg)) {
+		if (sg_page(sg))
+			__free_pages(sg_page(sg), get_order(sg->length));
+	}
+
+	sg_free_table(st);
+	kfree(st);
+}
+
+static int get_huge_pages(struct drm_i915_gem_object *obj)
+{
+#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
+	unsigned int page_mask = obj->mm.page_mask;
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned int sg_page_sizes;
+	u64 rem;
+
+	/* restricted by sg_alloc_table */
+	if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int))
+		return -E2BIG;
+
+	st = kmalloc(sizeof(*st), GFP);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	rem = obj->base.size;
+	sg = st->sgl;
+	st->nents = 0;
+	sg_page_sizes = 0;
+
+	/*
+	 * Our goal here is simple, we want to greedily fill the object from
+	 * largest to smallest page-size, while ensuring that we use *every*
+	 * page-size as per the given page-mask.
+	 */
+	do {
+		unsigned int bit = ilog2(page_mask);
+		unsigned int page_size = BIT(bit);
+		int order = get_order(page_size);
+
+		do {
+			struct page *page;
+
+			GEM_BUG_ON(order > MAX_ORDER);
+			page = alloc_pages(GFP | __GFP_ZERO, order);
+			if (!page)
+				goto err;
+
+			sg_set_page(sg, page, page_size, 0);
+			sg_page_sizes |= page_size;
+			st->nents++;
+
+			rem -= page_size;
+			if (!rem) {
+				sg_mark_end(sg);
+				break;
+			}
+
+			sg = __sg_next(sg);
+		} while ((rem - ((page_size-1) & page_mask)) >= page_size);
+
+		page_mask &= (page_size-1);
+	} while (page_mask);
+
+	if (i915_gem_gtt_prepare_pages(obj, st))
+		goto err;
+
+	GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
+	__i915_gem_object_set_pages(obj, st);
+
+	return 0;
+
+err:
+	sg_set_page(sg, NULL, 0, 0);
+	sg_mark_end(sg);
+	huge_pages_free_pages(st);
+
+	return -ENOMEM;
+}
+
+static void put_huge_pages(struct drm_i915_gem_object *obj,
+			   struct sg_table *pages)
+{
+	i915_gem_gtt_finish_pages(obj, pages);
+	huge_pages_free_pages(pages);
+
+	obj->mm.dirty = false;
+
+	__start_cpu_write(obj);
+}
+
+static const struct drm_i915_gem_object_ops huge_page_ops = {
+	.name = "huge-gem",
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = get_huge_pages,
+	.put_pages = put_huge_pages,
+};
+
+static struct drm_i915_gem_object *
+huge_pages_object(struct drm_i915_private *i915,
+		  u64 size,
+		  unsigned int page_mask)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_gem_object *obj;
+	unsigned int cache_level;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
+
+	if (size >> PAGE_SHIFT > INT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+	i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0);
+	obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
+	i915_gem_object_set_volatile(obj);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+
+	cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
+	i915_gem_object_set_cache_coherency(obj, cache_level);
+
+	obj->mm.page_mask = page_mask;
+
+	return obj;
+}
+
+static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	const u64 max_len = rounddown_pow_of_two(UINT_MAX);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	u64 rem;
+
+	/* restricted by sg_alloc_table */
+	if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int))
+		return -E2BIG;
+
+	st = kmalloc(sizeof(*st), GFP);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	/* Use optimal page sized chunks to fill in the sg table */
+	rem = obj->base.size;
+	sg = st->sgl;
+	st->nents = 0;
+	do {
+		unsigned int page_size = get_largest_page_size(i915, rem);
+		unsigned int len = min(page_size * div_u64(rem, page_size),
+				       max_len);
+
+		GEM_BUG_ON(!page_size);
+
+		sg->offset = 0;
+		sg->length = len;
+		sg_dma_len(sg) = len;
+		sg_dma_address(sg) = page_size;
+
+		st->nents++;
+
+		rem -= len;
+		if (!rem) {
+			sg_mark_end(sg);
+			break;
+		}
+
+		sg = sg_next(sg);
+	} while (1);
+
+	i915_sg_trim(st);
+
+	__i915_gem_object_set_pages(obj, st);
+
+	return 0;
+}
+
+static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	unsigned int page_size;
+
+	st = kmalloc(sizeof(*st), GFP);
+	if (!st)
+		return -ENOMEM;
+
+	if (sg_alloc_table(st, 1, GFP)) {
+		kfree(st);
+		return -ENOMEM;
+	}
+
+	sg = st->sgl;
+	st->nents = 1;
+
+	page_size = get_largest_page_size(i915, obj->base.size);
+	GEM_BUG_ON(!page_size);
+
+	sg->offset = 0;
+	sg->length = obj->base.size;
+	sg_dma_len(sg) = obj->base.size;
+	sg_dma_address(sg) = page_size;
+
+	__i915_gem_object_set_pages(obj, st);
+
+	return 0;
+#undef GFP
+}
+
+static void fake_free_huge_pages(struct drm_i915_gem_object *obj,
+				 struct sg_table *pages)
+{
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
+				struct sg_table *pages)
+{
+	fake_free_huge_pages(obj, pages);
+	obj->mm.dirty = false;
+}
+
+static const struct drm_i915_gem_object_ops fake_ops = {
+	.name = "fake-gem",
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = fake_get_huge_pages,
+	.put_pages = fake_put_huge_pages,
+};
+
+static const struct drm_i915_gem_object_ops fake_ops_single = {
+	.name = "fake-gem",
+	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
+	.get_pages = fake_get_huge_pages_single,
+	.put_pages = fake_put_huge_pages,
+};
+
+static struct drm_i915_gem_object *
+fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
+{
+	static struct lock_class_key lock_class;
+	struct drm_i915_gem_object *obj;
+
+	GEM_BUG_ON(!size);
+	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
+
+	if (size >> PAGE_SHIFT > UINT_MAX)
+		return ERR_PTR(-E2BIG);
+
+	if (overflows_type(size, obj->base.size))
+		return ERR_PTR(-E2BIG);
+
+	obj = i915_gem_object_alloc();
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+
+	drm_gem_private_object_init(&i915->drm, &obj->base, size);
+
+	if (single)
+		i915_gem_object_init(obj, &fake_ops_single, &lock_class, 0);
+	else
+		i915_gem_object_init(obj, &fake_ops, &lock_class, 0);
+
+	i915_gem_object_set_volatile(obj);
+
+	obj->write_domain = I915_GEM_DOMAIN_CPU;
+	obj->read_domains = I915_GEM_DOMAIN_CPU;
+	obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE);
+
+	return obj;
+}
+
+static int igt_check_page_sizes(struct i915_vma *vma)
+{
+	struct drm_i915_private *i915 = vma->vm->i915;
+	unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
+	struct drm_i915_gem_object *obj = vma->obj;
+	int err;
+
+	/* We have to wait for the async bind to complete before our asserts */
+	err = i915_vma_sync(vma);
+	if (err)
+		return err;
+
+	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
+		pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
+		       vma->page_sizes.sg & ~supported, supported);
+		err = -EINVAL;
+	}
+
+	if (!HAS_PAGE_SIZES(i915, vma->resource->page_sizes_gtt)) {
+		pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
+		       vma->resource->page_sizes_gtt & ~supported, supported);
+		err = -EINVAL;
+	}
+
+	if (vma->page_sizes.phys != obj->mm.page_sizes.phys) {
+		pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n",
+		       vma->page_sizes.phys, obj->mm.page_sizes.phys);
+		err = -EINVAL;
+	}
+
+	if (vma->page_sizes.sg != obj->mm.page_sizes.sg) {
+		pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n",
+		       vma->page_sizes.sg, obj->mm.page_sizes.sg);
+		err = -EINVAL;
+	}
+
+	/*
+	 * The dma-api is like a box of chocolates when it comes to the
+	 * alignment of dma addresses, however for LMEM we have total control
+	 * and so can guarantee alignment, likewise when we allocate our blocks
+	 * they should appear in descending order, and if we know that we align
+	 * to the largest page size for the GTT address, we should be able to
+	 * assert that if we see 2M physical pages then we should also get 2M
+	 * GTT pages. If we don't then something might be wrong in our
+	 * construction of the backing pages.
+	 *
+	 * Maintaining alignment is required to utilise huge pages in the ppGGT.
+	 */
+	if (i915_gem_object_is_lmem(obj) &&
+	    IS_ALIGNED(i915_vma_offset(vma), SZ_2M) &&
+	    vma->page_sizes.sg & SZ_2M &&
+	    vma->resource->page_sizes_gtt < SZ_2M) {
+		pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
+		       vma->page_sizes.sg, vma->resource->page_sizes_gtt);
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int igt_mock_exhaust_device_supported_pages(void *arg)
+{
+	struct i915_ppgtt *ppgtt = arg;
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	unsigned int saved_mask = RUNTIME_INFO(i915)->page_sizes;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int i, j, single;
+	int err;
+
+	/*
+	 * Sanity check creating objects with every valid page support
+	 * combination for our mock device.
+	 */
+
+	for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
+		unsigned int combination = SZ_4K; /* Required for ppGTT */
+
+		for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
+			if (i & BIT(j))
+				combination |= page_sizes[j];
+		}
+
+		RUNTIME_INFO(i915)->page_sizes = combination;
+
+		for (single = 0; single <= 1; ++single) {
+			obj = fake_huge_pages_object(i915, combination, !!single);
+			if (IS_ERR(obj)) {
+				err = PTR_ERR(obj);
+				goto out_device;
+			}
+
+			if (obj->base.size != combination) {
+				pr_err("obj->base.size=%zu, expected=%u\n",
+				       obj->base.size, combination);
+				err = -EINVAL;
+				goto out_put;
+			}
+
+			vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto out_put;
+			}
+
+			err = i915_vma_pin(vma, 0, 0, PIN_USER);
+			if (err)
+				goto out_put;
+
+			err = igt_check_page_sizes(vma);
+
+			if (vma->page_sizes.sg != combination) {
+				pr_err("page_sizes.sg=%u, expected=%u\n",
+				       vma->page_sizes.sg, combination);
+				err = -EINVAL;
+			}
+
+			i915_vma_unpin(vma);
+			i915_gem_object_put(obj);
+
+			if (err)
+				goto out_device;
+		}
+	}
+
+	goto out_device;
+
+out_put:
+	i915_gem_object_put(obj);
+out_device:
+	RUNTIME_INFO(i915)->page_sizes = saved_mask;
+
+	return err;
+}
+
+static int igt_mock_memory_region_huge_pages(void *arg)
+{
+	const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS };
+	struct i915_ppgtt *ppgtt = arg;
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	unsigned long supported = RUNTIME_INFO(i915)->page_sizes;
+	struct intel_memory_region *mem;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int bit;
+	int err = 0;
+
+	mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
+	if (IS_ERR(mem)) {
+		pr_err("%s failed to create memory region\n", __func__);
+		return PTR_ERR(mem);
+	}
+
+	for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		unsigned int page_size = BIT(bit);
+		resource_size_t phys;
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(flags); ++i) {
+			obj = i915_gem_object_create_region(mem,
+							    page_size, page_size,
+							    flags[i]);
+			if (IS_ERR(obj)) {
+				err = PTR_ERR(obj);
+				goto out_region;
+			}
+
+			vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto out_put;
+			}
+
+			err = i915_vma_pin(vma, 0, 0, PIN_USER);
+			if (err)
+				goto out_put;
+
+			err = igt_check_page_sizes(vma);
+			if (err)
+				goto out_unpin;
+
+			phys = i915_gem_object_get_dma_address(obj, 0);
+			if (!IS_ALIGNED(phys, page_size)) {
+				pr_err("%s addr misaligned(%pa) page_size=%u\n",
+				       __func__, &phys, page_size);
+				err = -EINVAL;
+				goto out_unpin;
+			}
+
+			if (vma->resource->page_sizes_gtt != page_size) {
+				pr_err("%s page_sizes.gtt=%u, expected=%u\n",
+				       __func__, vma->resource->page_sizes_gtt,
+				       page_size);
+				err = -EINVAL;
+				goto out_unpin;
+			}
+
+			i915_vma_unpin(vma);
+			__i915_gem_object_put_pages(obj);
+			i915_gem_object_put(obj);
+		}
+	}
+
+	goto out_region;
+
+out_unpin:
+	i915_vma_unpin(vma);
+out_put:
+	i915_gem_object_put(obj);
+out_region:
+	intel_memory_region_destroy(mem);
+	return err;
+}
+
+static int igt_mock_ppgtt_misaligned_dma(void *arg)
+{
+	struct i915_ppgtt *ppgtt = arg;
+	struct drm_i915_private *i915 = ppgtt->vm.i915;
+	unsigned long supported = RUNTIME_INFO(i915)->page_sizes;
+	struct drm_i915_gem_object *obj;
+	int bit;
+	int err;
+
+	/*
+	 * Sanity check dma misalignment for huge pages -- the dma addresses we
+	 * insert into the paging structures need to always respect the page
+	 * size alignment.
+	 */
+
+	bit = ilog2(I915_GTT_PAGE_SIZE_64K);
+
+	for_each_set_bit_from(bit, &supported,
+			      ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
+		IGT_TIMEOUT(end_time);
+		unsigned int page_size = BIT(bit);
+		unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+		unsigned int offset;
+		unsigned int size =
+			round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1;
+		struct i915_vma *vma;
+
+		obj = fake_huge_pages_object(i915, size, true);
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		if (obj->base.size != size) {
+			pr_err("obj->base.size=%zu, expected=%u\n",
+			       obj->base.size, size);
+			err = -EINVAL;
+			goto out_put;
+		}
+
+		err = i915_gem_object_pin_pages_unlocked(obj);
+		if (err)
+			goto out_put;
+
+		/* Force the page size for this object */
+		obj->mm.page_sizes.sg = page_size;
+
+		vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_unpin;
+		}
+
+		err = i915_vma_pin(vma, 0, 0, flags);
+		if (err)
+			goto out_unpin;
+
+
+		err = igt_check_page_sizes(vma);
+
+		if (vma->resource->page_sizes_gtt != page_size) {
+			pr_err("page_sizes.gtt=%u, expected %u\n",
+			       vma->resource->page_sizes_gtt, page_size);
+			err = -EINVAL;
+		}
+
+		i915_vma_unpin(vma);
+
+		if (err)
+			goto out_unpin;
+
+		/*
+		 * Try all the other valid offsets until the next
+		 * boundary -- should always fall back to using 4K
+		 * pages.
+		 */
+		for (offset = 4096; offset < page_size; offset += 4096) {
+			err = i915_vma_unbind_unlocked(vma);
+			if (err)
+				goto out_unpin;
+
+			err = i915_vma_pin(vma, 0, 0, flags | offset);
+			if (err)
+				goto out_unpin;
+
+			err = igt_check_page_sizes(vma);
+
+			if (vma->resource->page_sizes_gtt != I915_GTT_PAGE_SIZE_4K) {
+				pr_err("page_sizes.gtt=%u, expected %llu\n",
+				       vma->resource->page_sizes_gtt,
+				       I915_GTT_PAGE_SIZE_4K);
+				err = -EINVAL;
+			}
+
+			i915_vma_unpin(vma);
+
+			if (err)
+				goto out_unpin;
+
+			if (igt_timeout(end_time,
+					"%s timed out at offset %x with page-size %x\n",
+					__func__, offset, page_size))
+				break;
+		}
+
+		i915_gem_object_lock(obj, NULL);
+		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_put_pages(obj);
+		i915_gem_object_unlock(obj);
+		i915_gem_object_put(obj);
+	}
+
+	return 0;
+
+out_unpin:
+	i915_gem_object_lock(obj, NULL);
+	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_unlock(obj);
+out_put:
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+static void close_object_list(struct list_head *objects)
+{
+	struct drm_i915_gem_object *obj, *on;
+
+	list_for_each_entry_safe(obj, on, objects, st_link) {
+		list_del(&obj->st_link);
+		i915_gem_object_lock(obj, NULL);
+		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_put_pages(obj);
+		i915_gem_object_unlock(obj);
+		i915_gem_object_put(obj);
+	}
+}
+
+static int igt_ppgtt_huge_fill(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
+	bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50);
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	unsigned long max_pages;
+	unsigned long page_num;
+	struct file *file;
+	bool single = false;
+	LIST_HEAD(objects);
+	IGT_TIMEOUT(end_time);
+	int err = -ENODEV;
+
+	if (supported == I915_GTT_PAGE_SIZE_4K)
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = hugepage_ctx(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+	vm = i915_gem_context_get_eb_vm(ctx);
+	max_pages = vm->total >> PAGE_SHIFT;
+
+	for_each_prime_number_from(page_num, 1, max_pages) {
+		struct drm_i915_gem_object *obj;
+		u64 size = page_num << PAGE_SHIFT;
+		struct i915_vma *vma;
+		unsigned int expected_gtt = 0;
+		int i;
+
+		obj = fake_huge_pages_object(i915, size, single);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			break;
+		}
+
+		if (obj->base.size != size) {
+			pr_err("obj->base.size=%zd, expected=%llu\n",
+			       obj->base.size, size);
+			i915_gem_object_put(obj);
+			err = -EINVAL;
+			break;
+		}
+
+		err = i915_gem_object_pin_pages_unlocked(obj);
+		if (err) {
+			i915_gem_object_put(obj);
+			break;
+		}
+
+		list_add(&obj->st_link, &objects);
+
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			break;
+		}
+
+		/* vma start must be aligned to BIT(21) to allow 2M PTEs */
+		err = i915_vma_pin(vma, 0, BIT(21), PIN_USER);
+		if (err)
+			break;
+
+		err = igt_check_page_sizes(vma);
+		if (err) {
+			i915_vma_unpin(vma);
+			break;
+		}
+
+		/*
+		 * Figure out the expected gtt page size knowing that we go from
+		 * largest to smallest page size sg chunks, and that we align to
+		 * the largest page size.
+		 */
+		for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
+			unsigned int page_size = page_sizes[i];
+
+			if (HAS_PAGE_SIZES(i915, page_size) &&
+			    size >= page_size) {
+				expected_gtt |= page_size;
+				size &= page_size-1;
+			}
+		}
+
+		GEM_BUG_ON(!expected_gtt);
+		GEM_BUG_ON(size);
+
+		if (!has_pte64 && (obj->base.size < I915_GTT_PAGE_SIZE_2M ||
+				   expected_gtt & I915_GTT_PAGE_SIZE_2M))
+			expected_gtt &= ~I915_GTT_PAGE_SIZE_64K;
+
+		i915_vma_unpin(vma);
+
+		if (!has_pte64 && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+			if (!IS_ALIGNED(vma->node.start,
+					I915_GTT_PAGE_SIZE_2M)) {
+				pr_err("node.start(%llx) not aligned to 2M\n",
+				       vma->node.start);
+				err = -EINVAL;
+				break;
+			}
+
+			if (!IS_ALIGNED(vma->node.size,
+					I915_GTT_PAGE_SIZE_2M)) {
+				pr_err("node.size(%llx) not aligned to 2M\n",
+				       vma->node.size);
+				err = -EINVAL;
+				break;
+			}
+		}
+
+		if (vma->resource->page_sizes_gtt != expected_gtt) {
+			pr_err("gtt=%#x, expected=%#x, size=0x%zx, single=%s\n",
+			       vma->resource->page_sizes_gtt, expected_gtt,
+			       obj->base.size, str_yes_no(!!single));
+			err = -EINVAL;
+			break;
+		}
+
+		if (igt_timeout(end_time,
+				"%s timed out at size %zd\n",
+				__func__, obj->base.size))
+			break;
+
+		single = !single;
+	}
+
+	close_object_list(&objects);
+
+	if (err == -ENOMEM || err == -ENOSPC)
+		err = 0;
+
+	i915_vm_put(vm);
+out:
+	fput(file);
+	return err;
+}
+
+static int igt_ppgtt_64K(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50);
+	struct drm_i915_gem_object *obj;
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	struct file *file;
+	const struct object_info {
+		unsigned int size;
+		unsigned int gtt;
+		unsigned int offset;
+	} objects[] = {
+		/* Cases with forced padding/alignment */
+		{
+			.size = SZ_64K,
+			.gtt = I915_GTT_PAGE_SIZE_64K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_64K + SZ_4K,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_64K - SZ_4K,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M,
+			.gtt = I915_GTT_PAGE_SIZE_64K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M - SZ_4K,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M + SZ_4K,
+			.gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M + SZ_64K,
+			.gtt = I915_GTT_PAGE_SIZE_64K,
+			.offset = 0,
+		},
+		{
+			.size = SZ_2M - SZ_64K,
+			.gtt = I915_GTT_PAGE_SIZE_64K,
+			.offset = 0,
+		},
+		/* Try without any forced padding/alignment */
+		{
+			.size = SZ_64K,
+			.offset = SZ_2M,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+		},
+		{
+			.size = SZ_128K,
+			.offset = SZ_2M - SZ_64K,
+			.gtt = I915_GTT_PAGE_SIZE_4K,
+		},
+	};
+	struct i915_vma *vma;
+	int i, single;
+	int err;
+
+	/*
+	 * Sanity check some of the trickiness with 64K pages -- either we can
+	 * safely mark the whole page-table(2M block) as 64K, or we have to
+	 * always fallback to 4K.
+	 */
+
+	if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K))
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = hugepage_ctx(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+	vm = i915_gem_context_get_eb_vm(ctx);
+
+	for (i = 0; i < ARRAY_SIZE(objects); ++i) {
+		unsigned int size = objects[i].size;
+		unsigned int expected_gtt = objects[i].gtt;
+		unsigned int offset = objects[i].offset;
+		unsigned int flags = PIN_USER;
+
+		/*
+		 * For modern GTT models, the requirements for marking a page-table
+		 * as 64K have been relaxed.  Account for this.
+		 */
+		if (has_pte64) {
+			expected_gtt = 0;
+			if (size >= SZ_64K)
+				expected_gtt |= I915_GTT_PAGE_SIZE_64K;
+			if (size & (SZ_64K - 1))
+				expected_gtt |= I915_GTT_PAGE_SIZE_4K;
+		}
+
+		for (single = 0; single <= 1; single++) {
+			obj = fake_huge_pages_object(i915, size, !!single);
+			if (IS_ERR(obj)) {
+				err = PTR_ERR(obj);
+				goto out_vm;
+			}
+
+			err = i915_gem_object_pin_pages_unlocked(obj);
+			if (err)
+				goto out_object_put;
+
+			/*
+			 * Disable 2M pages -- We only want to use 64K/4K pages
+			 * for this test.
+			 */
+			obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
+
+			vma = i915_vma_instance(obj, vm, NULL);
+			if (IS_ERR(vma)) {
+				err = PTR_ERR(vma);
+				goto out_object_unpin;
+			}
+
+			if (offset)
+				flags |= PIN_OFFSET_FIXED | offset;
+
+			err = i915_vma_pin(vma, 0, 0, flags);
+			if (err)
+				goto out_object_unpin;
+
+			err = igt_check_page_sizes(vma);
+			if (err)
+				goto out_vma_unpin;
+
+			if (!has_pte64 && !offset &&
+			    vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
+				if (!IS_ALIGNED(vma->node.start,
+						I915_GTT_PAGE_SIZE_2M)) {
+					pr_err("node.start(%llx) not aligned to 2M\n",
+					       vma->node.start);
+					err = -EINVAL;
+					goto out_vma_unpin;
+				}
+
+				if (!IS_ALIGNED(vma->node.size,
+						I915_GTT_PAGE_SIZE_2M)) {
+					pr_err("node.size(%llx) not aligned to 2M\n",
+					       vma->node.size);
+					err = -EINVAL;
+					goto out_vma_unpin;
+				}
+			}
+
+			if (vma->resource->page_sizes_gtt != expected_gtt) {
+				pr_err("gtt=%#x, expected=%#x, i=%d, single=%s offset=%#x size=%#x\n",
+				       vma->resource->page_sizes_gtt,
+				       expected_gtt, i, str_yes_no(!!single),
+				       offset, size);
+				err = -EINVAL;
+				goto out_vma_unpin;
+			}
+
+			i915_vma_unpin(vma);
+			i915_gem_object_lock(obj, NULL);
+			i915_gem_object_unpin_pages(obj);
+			__i915_gem_object_put_pages(obj);
+			i915_gem_object_unlock(obj);
+			i915_gem_object_put(obj);
+
+			i915_gem_drain_freed_objects(i915);
+		}
+	}
+
+	goto out_vm;
+
+out_vma_unpin:
+	i915_vma_unpin(vma);
+out_object_unpin:
+	i915_gem_object_lock(obj, NULL);
+	i915_gem_object_unpin_pages(obj);
+	i915_gem_object_unlock(obj);
+out_object_put:
+	i915_gem_object_put(obj);
+out_vm:
+	i915_vm_put(vm);
+out:
+	fput(file);
+	return err;
+}
+
+static int gpu_write(struct intel_context *ce,
+		     struct i915_vma *vma,
+		     u32 dw,
+		     u32 val)
+{
+	int err;
+
+	i915_gem_object_lock(vma->obj, NULL);
+	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
+	i915_gem_object_unlock(vma->obj);
+	if (err)
+		return err;
+
+	return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32),
+			       vma->size >> PAGE_SHIFT, val);
+}
+
+static int
+__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+	unsigned int needs_flush;
+	unsigned long n;
+	int err;
+
+	i915_gem_object_lock(obj, NULL);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
+	if (err)
+		goto err_unlock;
+
+	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
+		u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
+
+		if (needs_flush & CLFLUSH_BEFORE)
+			drm_clflush_virt_range(ptr, PAGE_SIZE);
+
+		if (ptr[dword] != val) {
+			pr_err("n=%lu ptr[%u]=%u, val=%u\n",
+			       n, dword, ptr[dword], val);
+			kunmap_atomic(ptr);
+			err = -EINVAL;
+			break;
+		}
+
+		kunmap_atomic(ptr);
+	}
+
+	i915_gem_object_finish_access(obj);
+err_unlock:
+	i915_gem_object_unlock(obj);
+
+	return err;
+}
+
+static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+	unsigned long n = obj->base.size >> PAGE_SHIFT;
+	u32 *ptr;
+	int err;
+
+	err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		return err;
+
+	ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+
+	ptr += dword;
+	while (n--) {
+		if (*ptr != val) {
+			pr_err("base[%u]=%08x, val=%08x\n",
+			       dword, *ptr, val);
+			err = -EINVAL;
+			break;
+		}
+
+		ptr += PAGE_SIZE / sizeof(*ptr);
+	}
+
+	i915_gem_object_unpin_map(obj);
+	return err;
+}
+
+static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
+{
+	if (i915_gem_object_has_struct_page(obj))
+		return __cpu_check_shmem(obj, dword, val);
+	else
+		return __cpu_check_vmap(obj, dword, val);
+}
+
+static int __igt_write_huge(struct intel_context *ce,
+			    struct drm_i915_gem_object *obj,
+			    u64 size, u64 offset,
+			    u32 dword, u32 val)
+{
+	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
+	struct i915_vma *vma;
+	int err;
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, size, 0, flags | offset);
+	if (err) {
+		/*
+		 * The ggtt may have some pages reserved so
+		 * refrain from erroring out.
+		 */
+		if (err == -ENOSPC && i915_is_ggtt(ce->vm))
+			err = 0;
+
+		return err;
+	}
+
+	err = igt_check_page_sizes(vma);
+	if (err)
+		goto out_vma_unpin;
+
+	err = gpu_write(ce, vma, dword, val);
+	if (err) {
+		pr_err("gpu-write failed at offset=%llx\n", offset);
+		goto out_vma_unpin;
+	}
+
+	err = cpu_check(obj, dword, val);
+	if (err) {
+		pr_err("cpu-check failed at offset=%llx\n", offset);
+		goto out_vma_unpin;
+	}
+
+out_vma_unpin:
+	i915_vma_unpin(vma);
+	return err;
+}
+
+static int igt_write_huge(struct drm_i915_private *i915,
+			  struct drm_i915_gem_object *obj)
+{
+	struct i915_gem_engines *engines;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	unsigned int max_page_size;
+	unsigned int count;
+	struct i915_gem_context *ctx;
+	struct file *file;
+	u64 max;
+	u64 num;
+	u64 size;
+	int *order;
+	int i, n;
+	int err = 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = hugepage_ctx(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+
+	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+
+	size = obj->base.size;
+	if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
+	    !HAS_64K_PAGES(i915))
+		size = round_up(size, I915_GTT_PAGE_SIZE_2M);
+
+	n = 0;
+	count = 0;
+	max = U64_MAX;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		count++;
+		if (!intel_engine_can_store_dword(ce->engine))
+			continue;
+
+		max = min(max, ce->vm->total);
+		n++;
+	}
+	i915_gem_context_unlock_engines(ctx);
+	if (!n)
+		goto out;
+
+	/*
+	 * To keep things interesting when alternating between engines in our
+	 * randomized order, lets also make feeding to the same engine a few
+	 * times in succession a possibility by enlarging the permutation array.
+	 */
+	order = i915_random_order(count * count, &prng);
+	if (!order) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
+	max = div_u64(max - size, max_page_size);
+
+	/*
+	 * Try various offsets in an ascending/descending fashion until we
+	 * timeout -- we want to avoid issues hidden by effectively always using
+	 * offset = 0.
+	 */
+	i = 0;
+	engines = i915_gem_context_lock_engines(ctx);
+	for_each_prime_number_from(num, 0, max) {
+		u64 offset_low = num * max_page_size;
+		u64 offset_high = (max - num) * max_page_size;
+		u32 dword = offset_in_page(num) / 4;
+		struct intel_context *ce;
+
+		ce = engines->engines[order[i] % engines->num_engines];
+		i = (i + 1) % (count * count);
+		if (!ce || !intel_engine_can_store_dword(ce->engine))
+			continue;
+
+		/*
+		 * In order to utilize 64K pages we need to both pad the vma
+		 * size and ensure the vma offset is at the start of the pt
+		 * boundary, however to improve coverage we opt for testing both
+		 * aligned and unaligned offsets.
+		 *
+		 * With PS64 this is no longer the case, but to ensure we
+		 * sometimes get the compact layout for smaller objects, apply
+		 * the round_up anyway.
+		 */
+		if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
+			offset_low = round_down(offset_low,
+						I915_GTT_PAGE_SIZE_2M);
+
+		err = __igt_write_huge(ce, obj, size, offset_low,
+				       dword, num + 1);
+		if (err)
+			break;
+
+		err = __igt_write_huge(ce, obj, size, offset_high,
+				       dword, num + 1);
+		if (err)
+			break;
+
+		if (igt_timeout(end_time,
+				"%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
+				__func__, ce->engine->name, offset_low, offset_high,
+				max_page_size))
+			break;
+	}
+	i915_gem_context_unlock_engines(ctx);
+
+	kfree(order);
+
+out:
+	fput(file);
+	return err;
+}
+
+typedef struct drm_i915_gem_object *
+(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags);
+
+static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
+{
+	return i915->mm.gemfs && has_transparent_hugepage();
+}
+
+static struct drm_i915_gem_object *
+igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+	if (!igt_can_allocate_thp(i915)) {
+		pr_info("%s missing THP support, skipping\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	return i915_gem_object_create_shmem(i915, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+	return i915_gem_object_create_internal(i915, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+	return huge_pages_object(i915, size, size);
+}
+
+static struct drm_i915_gem_object *
+igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags)
+{
+	return i915_gem_object_create_lmem(i915, size, flags);
+}
+
+static u32 igt_random_size(struct rnd_state *prng,
+			   u32 min_page_size,
+			   u32 max_page_size)
+{
+	u64 mask;
+	u32 size;
+
+	GEM_BUG_ON(!is_power_of_2(min_page_size));
+	GEM_BUG_ON(!is_power_of_2(max_page_size));
+	GEM_BUG_ON(min_page_size < PAGE_SIZE);
+	GEM_BUG_ON(min_page_size > max_page_size);
+
+	mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK;
+	size = prandom_u32_state(prng) & mask;
+	if (size < min_page_size)
+		size |= min_page_size;
+
+	return size;
+}
+
+static int igt_ppgtt_smoke_huge(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	I915_RND_STATE(prng);
+	struct {
+		igt_create_fn fn;
+		u32 min;
+		u32 max;
+	} backends[] = {
+		{ igt_create_internal, SZ_64K, SZ_2M,  },
+		{ igt_create_shmem,    SZ_64K, SZ_32M, },
+		{ igt_create_local,    SZ_64K, SZ_1G,  },
+	};
+	int err;
+	int i;
+
+	/*
+	 * Sanity check that the HW uses huge pages correctly through our
+	 * various backends -- ensure that our writes land in the right place.
+	 */
+
+	for (i = 0; i < ARRAY_SIZE(backends); ++i) {
+		u32 min = backends[i].min;
+		u32 max = backends[i].max;
+		u32 size = max;
+
+try_again:
+		size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
+
+		obj = backends[i].fn(i915, size, 0);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			if (err == -E2BIG) {
+				size >>= 1;
+				goto try_again;
+			} else if (err == -ENODEV) {
+				err = 0;
+				continue;
+			}
+
+			return err;
+		}
+
+		err = i915_gem_object_pin_pages_unlocked(obj);
+		if (err) {
+			if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
+				i915_gem_object_put(obj);
+				size >>= 1;
+				goto try_again;
+			}
+			goto out_put;
+		}
+
+		if (obj->mm.page_sizes.phys < min) {
+			pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n",
+				__func__, size, i);
+			err = -ENOMEM;
+			goto out_unpin;
+		}
+
+		err = igt_write_huge(i915, obj);
+		if (err) {
+			pr_err("%s write-huge failed with size=%u, i=%d\n",
+			       __func__, size, i);
+		}
+out_unpin:
+		i915_gem_object_lock(obj, NULL);
+		i915_gem_object_unpin_pages(obj);
+		__i915_gem_object_put_pages(obj);
+		i915_gem_object_unlock(obj);
+out_put:
+		i915_gem_object_put(obj);
+
+		if (err == -ENOMEM || err == -ENXIO)
+			err = 0;
+
+		if (err)
+			break;
+
+		cond_resched();
+	}
+
+	return err;
+}
+
+static int igt_ppgtt_sanity_check(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
+	struct {
+		igt_create_fn fn;
+		unsigned int flags;
+	} backends[] = {
+		{ igt_create_system, 0,                        },
+		{ igt_create_local,  0,                        },
+		{ igt_create_local,  I915_BO_ALLOC_CONTIGUOUS, },
+	};
+	struct {
+		u32 size;
+		u32 pages;
+	} combos[] = {
+		{ SZ_64K,		SZ_64K		},
+		{ SZ_2M,		SZ_2M		},
+		{ SZ_2M,		SZ_64K		},
+		{ SZ_2M - SZ_64K,	SZ_64K		},
+		{ SZ_2M - SZ_4K,	SZ_64K | SZ_4K	},
+		{ SZ_2M + SZ_4K,	SZ_64K | SZ_4K	},
+		{ SZ_2M + SZ_4K,	SZ_2M  | SZ_4K	},
+		{ SZ_2M + SZ_64K,	SZ_2M  | SZ_64K },
+		{ SZ_2M + SZ_64K,	SZ_64K		},
+	};
+	int i, j;
+	int err;
+
+	if (supported == I915_GTT_PAGE_SIZE_4K)
+		return 0;
+
+	/*
+	 * Sanity check that the HW behaves with a limited set of combinations.
+	 * We already have a bunch of randomised testing, which should give us
+	 * a decent amount of variation between runs, however we should keep
+	 * this to limit the chances of introducing a temporary regression, by
+	 * testing the most obvious cases that might make something blow up.
+	 */
+
+	for (i = 0; i < ARRAY_SIZE(backends); ++i) {
+		for (j = 0; j < ARRAY_SIZE(combos); ++j) {
+			struct drm_i915_gem_object *obj;
+			u32 size = combos[j].size;
+			u32 pages = combos[j].pages;
+
+			obj = backends[i].fn(i915, size, backends[i].flags);
+			if (IS_ERR(obj)) {
+				err = PTR_ERR(obj);
+				if (err == -ENODEV) {
+					pr_info("Device lacks local memory, skipping\n");
+					err = 0;
+					break;
+				}
+
+				return err;
+			}
+
+			err = i915_gem_object_pin_pages_unlocked(obj);
+			if (err) {
+				i915_gem_object_put(obj);
+				goto out;
+			}
+
+			GEM_BUG_ON(pages > obj->base.size);
+			pages = pages & supported;
+
+			if (pages)
+				obj->mm.page_sizes.sg = pages;
+
+			err = igt_write_huge(i915, obj);
+
+			i915_gem_object_lock(obj, NULL);
+			i915_gem_object_unpin_pages(obj);
+			__i915_gem_object_put_pages(obj);
+			i915_gem_object_unlock(obj);
+			i915_gem_object_put(obj);
+
+			if (err) {
+				pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n",
+				       __func__, size, pages, i, j);
+				goto out;
+			}
+		}
+
+		cond_resched();
+	}
+
+out:
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
+static int igt_ppgtt_compact(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	/*
+	 * Simple test to catch issues with compact 64K pages -- since the pt is
+	 * compacted to 256B that gives us 32 entries per pt, however since the
+	 * backing page for the pt is 4K, any extra entries we might incorrectly
+	 * write out should be ignored by the HW. If ever hit such a case this
+	 * test should catch it since some of our writes would land in scratch.
+	 */
+
+	if (!HAS_64K_PAGES(i915)) {
+		pr_info("device lacks compact 64K page support, skipping\n");
+		return 0;
+	}
+
+	if (!HAS_LMEM(i915)) {
+		pr_info("device lacks LMEM support, skipping\n");
+		return 0;
+	}
+
+	/* We want the range to cover multiple page-table boundaries. */
+	obj = i915_gem_object_create_lmem(i915, SZ_4M, 0);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages_unlocked(obj);
+	if (err)
+		goto out_put;
+
+	if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
+		pr_info("LMEM compact unable to allocate huge-page(s)\n");
+		goto out_unpin;
+	}
+
+	/*
+	 * Disable 2M GTT pages by forcing the page-size to 64K for the GTT
+	 * insertion.
+	 */
+	obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K;
+
+	err = igt_write_huge(i915, obj);
+	if (err)
+		pr_err("LMEM compact write-huge failed\n");
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out_put:
+	i915_gem_object_put(obj);
+
+	if (err == -ENOMEM)
+		err = 0;
+
+	return err;
+}
+
+static int igt_ppgtt_mixed(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
+	struct drm_i915_gem_object *obj, *on;
+	struct i915_gem_engines *engines;
+	struct i915_gem_engines_iter it;
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	struct file *file;
+	I915_RND_STATE(prng);
+	LIST_HEAD(objects);
+	struct intel_memory_region *mr;
+	struct i915_vma *vma;
+	unsigned int count;
+	u32 i, addr;
+	int *order;
+	int n, err;
+
+	/*
+	 * Sanity check mixing 4K and 64K pages within the same page-table via
+	 * the new PS64 TLB hint.
+	 */
+
+	if (!HAS_64K_PAGES(i915)) {
+		pr_info("device lacks PS64, skipping\n");
+		return 0;
+	}
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = hugepage_ctx(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+	vm = i915_gem_context_get_eb_vm(ctx);
+
+	i = 0;
+	addr = 0;
+	do {
+		u32 sz;
+
+		sz = i915_prandom_u32_max_state(SZ_4M, &prng);
+		sz = max_t(u32, sz, SZ_4K);
+
+		mr = i915->mm.regions[INTEL_REGION_LMEM_0];
+		if (i & 1)
+			mr = i915->mm.regions[INTEL_REGION_SMEM];
+
+		obj = i915_gem_object_create_region(mr, sz, 0, 0);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto out_vm;
+		}
+
+		list_add_tail(&obj->st_link, &objects);
+
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto err_put;
+		}
+
+		addr = round_up(addr, mr->min_page_size);
+		err = i915_vma_pin(vma, 0, 0, addr | flags);
+		if (err)
+			goto err_put;
+
+		if (mr->type == INTEL_MEMORY_LOCAL &&
+		    (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) {
+			err = -EINVAL;
+			goto err_put;
+		}
+
+		addr += obj->base.size;
+		i++;
+	} while (addr <= SZ_16M);
+
+	n = 0;
+	count = 0;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		count++;
+		if (!intel_engine_can_store_dword(ce->engine))
+			continue;
+
+		n++;
+	}
+	i915_gem_context_unlock_engines(ctx);
+	if (!n)
+		goto err_put;
+
+	order = i915_random_order(count * count, &prng);
+	if (!order) {
+		err = -ENOMEM;
+		goto err_put;
+	}
+
+	i = 0;
+	addr = 0;
+	engines = i915_gem_context_lock_engines(ctx);
+	list_for_each_entry(obj, &objects, st_link) {
+		u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng);
+
+		addr = round_up(addr, obj->mm.region->min_page_size);
+
+		ce = engines->engines[order[i] % engines->num_engines];
+		i = (i + 1) % (count * count);
+		if (!ce || !intel_engine_can_store_dword(ce->engine))
+			continue;
+
+		err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd);
+		if (err)
+			break;
+
+		err = __igt_write_huge(ce, obj, obj->base.size, addr,
+				       offset_in_page(rnd) / sizeof(u32), rnd + 1);
+		if (err)
+			break;
+
+		err = __igt_write_huge(ce, obj, obj->base.size, addr,
+				       (PAGE_SIZE / sizeof(u32)) - 1,
+				       rnd + 2);
+		if (err)
+			break;
+
+		addr += obj->base.size;
+
+		cond_resched();
+	}
+
+	i915_gem_context_unlock_engines(ctx);
+	kfree(order);
+err_put:
+	list_for_each_entry_safe(obj, on, &objects, st_link) {
+		list_del(&obj->st_link);
+		i915_gem_object_put(obj);
+	}
+out_vm:
+	i915_vm_put(vm);
+out:
+	fput(file);
+	return err;
+}
+
+static int igt_tmpfs_fallback(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	struct vfsmount *gemfs = i915->mm.gemfs;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	struct file *file;
+	u32 *vaddr;
+	int err = 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = hugepage_ctx(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+	vm = i915_gem_context_get_eb_vm(ctx);
+
+	/*
+	 * Make sure that we don't burst into a ball of flames upon falling back
+	 * to tmpfs, which we rely on if on the off-chance we encouter a failure
+	 * when setting up gemfs.
+	 */
+
+	i915->mm.gemfs = NULL;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out_restore;
+	}
+
+	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto out_put;
+	}
+	*vaddr = 0xdeadbeaf;
+
+	__i915_gem_object_flush_map(obj, 0, 64);
+	i915_gem_object_unpin_map(obj);
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_put;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto out_put;
+
+	err = igt_check_page_sizes(vma);
+
+	i915_vma_unpin(vma);
+out_put:
+	i915_gem_object_put(obj);
+out_restore:
+	i915->mm.gemfs = gemfs;
+
+	i915_vm_put(vm);
+out:
+	fput(file);
+	return err;
+}
+
+static int igt_shrink_thp(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	struct drm_i915_gem_object *obj;
+	struct i915_gem_engines_iter it;
+	struct intel_context *ce;
+	struct i915_vma *vma;
+	struct file *file;
+	unsigned int flags = PIN_USER;
+	unsigned int n;
+	intel_wakeref_t wf;
+	bool should_swap;
+	int err;
+
+	if (!igt_can_allocate_thp(i915)) {
+		pr_info("missing THP support, skipping\n");
+		return 0;
+	}
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = hugepage_ctx(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out;
+	}
+	vm = i915_gem_context_get_eb_vm(ctx);
+
+	/*
+	 * Sanity check shrinking huge-paged object -- make sure nothing blows
+	 * up.
+	 */
+
+	obj = i915_gem_object_create_shmem(i915, SZ_2M);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto out_vm;
+	}
+
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_put;
+	}
+
+	wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */
+
+	err = i915_vma_pin(vma, 0, 0, flags);
+	if (err)
+		goto out_wf;
+
+	if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
+		pr_info("failed to allocate THP, finishing test early\n");
+		goto out_unpin;
+	}
+
+	err = igt_check_page_sizes(vma);
+	if (err)
+		goto out_unpin;
+
+	n = 0;
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		if (!intel_engine_can_store_dword(ce->engine))
+			continue;
+
+		err = gpu_write(ce, vma, n++, 0xdeadbeaf);
+		if (err)
+			break;
+	}
+	i915_gem_context_unlock_engines(ctx);
+	/*
+	 * Nuke everything *before* we unpin the pages so we can be reasonably
+	 * sure that when later checking get_nr_swap_pages() that some random
+	 * leftover object doesn't steal the remaining swap space.
+	 */
+	i915_gem_shrink(NULL, i915, -1UL, NULL,
+			I915_SHRINK_BOUND |
+			I915_SHRINK_UNBOUND |
+			I915_SHRINK_ACTIVE);
+	i915_vma_unpin(vma);
+	if (err)
+		goto out_wf;
+
+	/*
+	 * Now that the pages are *unpinned* shrinking should invoke
+	 * shmem to truncate our pages, if we have available swap.
+	 */
+	should_swap = get_nr_swap_pages() > 0;
+	i915_gem_shrink(NULL, i915, -1UL, NULL,
+			I915_SHRINK_BOUND |
+			I915_SHRINK_UNBOUND |
+			I915_SHRINK_ACTIVE |
+			I915_SHRINK_WRITEBACK);
+	if (should_swap == i915_gem_object_has_pages(obj)) {
+		pr_err("unexpected pages mismatch, should_swap=%s\n",
+		       str_yes_no(should_swap));
+		err = -EINVAL;
+		goto out_wf;
+	}
+
+	if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) {
+		pr_err("unexpected residual page-size bits, should_swap=%s\n",
+		       str_yes_no(should_swap));
+		err = -EINVAL;
+		goto out_wf;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, flags);
+	if (err)
+		goto out_wf;
+
+	while (n--) {
+		err = cpu_check(obj, n, 0xdeadbeaf);
+		if (err)
+			break;
+	}
+
+out_unpin:
+	i915_vma_unpin(vma);
+out_wf:
+	intel_runtime_pm_put(&i915->runtime_pm, wf);
+out_put:
+	i915_gem_object_put(obj);
+out_vm:
+	i915_vm_put(vm);
+out:
+	fput(file);
+	return err;
+}
+
+int i915_gem_huge_page_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_mock_exhaust_device_supported_pages),
+		SUBTEST(igt_mock_memory_region_huge_pages),
+		SUBTEST(igt_mock_ppgtt_misaligned_dma),
+	};
+	struct drm_i915_private *dev_priv;
+	struct i915_ppgtt *ppgtt;
+	int err;
+
+	dev_priv = mock_gem_device();
+	if (!dev_priv)
+		return -ENOMEM;
+
+	/* Pretend to be a device which supports the 48b PPGTT */
+	RUNTIME_INFO(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
+	RUNTIME_INFO(dev_priv)->ppgtt_size = 48;
+
+	ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
+	if (IS_ERR(ppgtt)) {
+		err = PTR_ERR(ppgtt);
+		goto out_unlock;
+	}
+
+	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
+		pr_err("failed to create 48b PPGTT\n");
+		err = -EINVAL;
+		goto out_put;
+	}
+
+	/* If we were ever hit this then it's time to mock the 64K scratch */
+	if (!i915_vm_has_scratch_64K(&ppgtt->vm)) {
+		pr_err("PPGTT missing 64K scratch page\n");
+		err = -EINVAL;
+		goto out_put;
+	}
+
+	err = i915_subtests(tests, ppgtt);
+
+out_put:
+	i915_vm_put(&ppgtt->vm);
+out_unlock:
+	mock_destroy_device(dev_priv);
+	return err;
+}
+
+int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_shrink_thp),
+		SUBTEST(igt_tmpfs_fallback),
+		SUBTEST(igt_ppgtt_smoke_huge),
+		SUBTEST(igt_ppgtt_sanity_check),
+		SUBTEST(igt_ppgtt_compact),
+		SUBTEST(igt_ppgtt_mixed),
+		SUBTEST(igt_ppgtt_huge_fill),
+		SUBTEST(igt_ppgtt_64K),
+	};
+
+	if (!HAS_PPGTT(i915)) {
+		pr_info("PPGTT not supported, skipping live-selftests\n");
+		return 0;
+	}
+
+	if (intel_gt_is_wedged(to_gt(i915)))
+		return 0;
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
new file mode 100644
index 0000000000..ff81af4c82
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c
@@ -0,0 +1,751 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2019 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "gt/intel_context.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_engine_user.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_regs.h"
+#include "gem/i915_gem_lmem.h"
+
+#include "gem/selftests/igt_gem_utils.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_drm.h"
+#include "selftests/i915_random.h"
+#include "huge_gem_object.h"
+#include "mock_context.h"
+
+#define OW_SIZE 16                      /* in bytes */
+#define F_SUBTILE_SIZE 64               /* in bytes */
+#define F_TILE_WIDTH 128                /* in bytes */
+#define F_TILE_HEIGHT 32                /* in pixels */
+#define F_SUBTILE_WIDTH  OW_SIZE        /* in bytes */
+#define F_SUBTILE_HEIGHT 4              /* in pixels */
+
+static int linear_x_y_to_ftiled_pos(int x, int y, u32 stride, int bpp)
+{
+	int tile_base;
+	int tile_x, tile_y;
+	int swizzle, subtile;
+	int pixel_size = bpp / 8;
+	int pos;
+
+	/*
+	 * Subtile remapping for F tile. Note that map[a]==b implies map[b]==a
+	 * so we can use the same table to tile and until.
+	 */
+	static const u8 f_subtile_map[] = {
+		 0,  1,  2,  3,  8,  9, 10, 11,
+		 4,  5,  6,  7, 12, 13, 14, 15,
+		16, 17, 18, 19, 24, 25, 26, 27,
+		20, 21, 22, 23, 28, 29, 30, 31,
+		32, 33, 34, 35, 40, 41, 42, 43,
+		36, 37, 38, 39, 44, 45, 46, 47,
+		48, 49, 50, 51, 56, 57, 58, 59,
+		52, 53, 54, 55, 60, 61, 62, 63
+	};
+
+	x *= pixel_size;
+	/*
+	 * Where does the 4k tile start (in bytes)?  This is the same for Y and
+	 * F so we can use the Y-tile algorithm to get to that point.
+	 */
+	tile_base =
+		y / F_TILE_HEIGHT * stride * F_TILE_HEIGHT +
+		x / F_TILE_WIDTH * 4096;
+
+	/* Find pixel within tile */
+	tile_x = x % F_TILE_WIDTH;
+	tile_y = y % F_TILE_HEIGHT;
+
+	/* And figure out the subtile within the 4k tile */
+	subtile = tile_y / F_SUBTILE_HEIGHT * 8 + tile_x / F_SUBTILE_WIDTH;
+
+	/* Swizzle the subtile number according to the bspec diagram */
+	swizzle = f_subtile_map[subtile];
+
+	/* Calculate new position */
+	pos = tile_base +
+		swizzle * F_SUBTILE_SIZE +
+		tile_y % F_SUBTILE_HEIGHT * OW_SIZE +
+		tile_x % F_SUBTILE_WIDTH;
+
+	GEM_BUG_ON(!IS_ALIGNED(pos, pixel_size));
+
+	return pos / pixel_size * 4;
+}
+
+enum client_tiling {
+	CLIENT_TILING_LINEAR,
+	CLIENT_TILING_X,
+	CLIENT_TILING_Y,
+	CLIENT_TILING_4,
+	CLIENT_NUM_TILING_TYPES
+};
+
+#define WIDTH 512
+#define HEIGHT 32
+
+struct blit_buffer {
+	struct i915_vma *vma;
+	u32 start_val;
+	enum client_tiling tiling;
+};
+
+struct tiled_blits {
+	struct intel_context *ce;
+	struct blit_buffer buffers[3];
+	struct blit_buffer scratch;
+	struct i915_vma *batch;
+	u64 hole;
+	u64 align;
+	u32 width;
+	u32 height;
+};
+
+static bool fastblit_supports_x_tiling(const struct drm_i915_private *i915)
+{
+	int gen = GRAPHICS_VER(i915);
+
+	/* XY_FAST_COPY_BLT does not exist on pre-gen9 platforms */
+	drm_WARN_ON(&i915->drm, gen < 9);
+
+	if (gen < 12)
+		return true;
+
+	if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50))
+		return false;
+
+	return HAS_DISPLAY(i915);
+}
+
+static bool fast_blit_ok(const struct blit_buffer *buf)
+{
+	/* XY_FAST_COPY_BLT does not exist on pre-gen9 platforms */
+	if (GRAPHICS_VER(buf->vma->vm->i915) < 9)
+		return false;
+
+	/* filter out platforms with unsupported X-tile support in fastblit */
+	if (buf->tiling == CLIENT_TILING_X && !fastblit_supports_x_tiling(buf->vma->vm->i915))
+		return false;
+
+	return true;
+}
+
+static int prepare_blit(const struct tiled_blits *t,
+			struct blit_buffer *dst,
+			struct blit_buffer *src,
+			struct drm_i915_gem_object *batch)
+{
+	const int ver = GRAPHICS_VER(to_i915(batch->base.dev));
+	bool use_64b_reloc = ver >= 8;
+	u32 src_pitch, dst_pitch;
+	u32 cmd, *cs;
+
+	cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	if (fast_blit_ok(dst) && fast_blit_ok(src)) {
+		struct intel_gt *gt = t->ce->engine->gt;
+		u32 src_tiles = 0, dst_tiles = 0;
+		u32 src_4t = 0, dst_4t = 0;
+
+		/* Need to program BLIT_CCTL if it is not done previously
+		 * before using XY_FAST_COPY_BLT
+		 */
+		*cs++ = MI_LOAD_REGISTER_IMM(1);
+		*cs++ = i915_mmio_reg_offset(BLIT_CCTL(t->ce->engine->mmio_base));
+		*cs++ = (BLIT_CCTL_SRC_MOCS(gt->mocs.uc_index) |
+			 BLIT_CCTL_DST_MOCS(gt->mocs.uc_index));
+
+		src_pitch = t->width; /* in dwords */
+		if (src->tiling == CLIENT_TILING_4) {
+			src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(YMAJOR);
+			src_4t = XY_FAST_COPY_BLT_D1_SRC_TILE4;
+		} else if (src->tiling == CLIENT_TILING_Y) {
+			src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(YMAJOR);
+		} else if (src->tiling == CLIENT_TILING_X) {
+			src_tiles = XY_FAST_COPY_BLT_D0_SRC_TILE_MODE(TILE_X);
+		} else {
+			src_pitch *= 4; /* in bytes */
+		}
+
+		dst_pitch = t->width; /* in dwords */
+		if (dst->tiling == CLIENT_TILING_4) {
+			dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(YMAJOR);
+			dst_4t = XY_FAST_COPY_BLT_D1_DST_TILE4;
+		} else if (dst->tiling == CLIENT_TILING_Y) {
+			dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(YMAJOR);
+		} else if (dst->tiling == CLIENT_TILING_X) {
+			dst_tiles = XY_FAST_COPY_BLT_D0_DST_TILE_MODE(TILE_X);
+		} else {
+			dst_pitch *= 4; /* in bytes */
+		}
+
+		*cs++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2) |
+			src_tiles | dst_tiles;
+		*cs++ = src_4t | dst_4t | BLT_DEPTH_32 | dst_pitch;
+		*cs++ = 0;
+		*cs++ = t->height << 16 | t->width;
+		*cs++ = lower_32_bits(i915_vma_offset(dst->vma));
+		*cs++ = upper_32_bits(i915_vma_offset(dst->vma));
+		*cs++ = 0;
+		*cs++ = src_pitch;
+		*cs++ = lower_32_bits(i915_vma_offset(src->vma));
+		*cs++ = upper_32_bits(i915_vma_offset(src->vma));
+	} else {
+		if (ver >= 6) {
+			*cs++ = MI_LOAD_REGISTER_IMM(1);
+			*cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
+			cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
+			if (src->tiling == CLIENT_TILING_Y)
+				cmd |= BCS_SRC_Y;
+			if (dst->tiling == CLIENT_TILING_Y)
+				cmd |= BCS_DST_Y;
+			*cs++ = cmd;
+
+			cmd = MI_FLUSH_DW;
+			if (ver >= 8)
+				cmd++;
+			*cs++ = cmd;
+			*cs++ = 0;
+			*cs++ = 0;
+			*cs++ = 0;
+		}
+
+		cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
+		if (ver >= 8)
+			cmd += 2;
+
+		src_pitch = t->width * 4;
+		if (src->tiling) {
+			cmd |= XY_SRC_COPY_BLT_SRC_TILED;
+			src_pitch /= 4;
+		}
+
+		dst_pitch = t->width * 4;
+		if (dst->tiling) {
+			cmd |= XY_SRC_COPY_BLT_DST_TILED;
+			dst_pitch /= 4;
+		}
+
+		*cs++ = cmd;
+		*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch;
+		*cs++ = 0;
+		*cs++ = t->height << 16 | t->width;
+		*cs++ = lower_32_bits(i915_vma_offset(dst->vma));
+		if (use_64b_reloc)
+			*cs++ = upper_32_bits(i915_vma_offset(dst->vma));
+		*cs++ = 0;
+		*cs++ = src_pitch;
+		*cs++ = lower_32_bits(i915_vma_offset(src->vma));
+		if (use_64b_reloc)
+			*cs++ = upper_32_bits(i915_vma_offset(src->vma));
+	}
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(batch);
+	i915_gem_object_unpin_map(batch);
+
+	return 0;
+}
+
+static void tiled_blits_destroy_buffers(struct tiled_blits *t)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(t->buffers); i++)
+		i915_vma_put(t->buffers[i].vma);
+
+	i915_vma_put(t->scratch.vma);
+	i915_vma_put(t->batch);
+}
+
+static struct i915_vma *
+__create_vma(struct tiled_blits *t, size_t size, bool lmem)
+{
+	struct drm_i915_private *i915 = t->ce->vm->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+
+	if (lmem)
+		obj = i915_gem_object_create_lmem(i915, size, 0);
+	else
+		obj = i915_gem_object_create_shmem(i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	vma = i915_vma_instance(obj, t->ce->vm, NULL);
+	if (IS_ERR(vma))
+		i915_gem_object_put(obj);
+
+	return vma;
+}
+
+static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem)
+{
+	return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem);
+}
+
+static int tiled_blits_create_buffers(struct tiled_blits *t,
+				      int width, int height,
+				      struct rnd_state *prng)
+{
+	struct drm_i915_private *i915 = t->ce->engine->i915;
+	int i;
+
+	t->width = width;
+	t->height = height;
+
+	t->batch = __create_vma(t, PAGE_SIZE, false);
+	if (IS_ERR(t->batch))
+		return PTR_ERR(t->batch);
+
+	t->scratch.vma = create_vma(t, false);
+	if (IS_ERR(t->scratch.vma)) {
+		i915_vma_put(t->batch);
+		return PTR_ERR(t->scratch.vma);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
+		struct i915_vma *vma;
+
+		vma = create_vma(t, HAS_LMEM(i915) && i % 2);
+		if (IS_ERR(vma)) {
+			tiled_blits_destroy_buffers(t);
+			return PTR_ERR(vma);
+		}
+
+		t->buffers[i].vma = vma;
+		t->buffers[i].tiling =
+			i915_prandom_u32_max_state(CLIENT_NUM_TILING_TYPES, prng);
+
+		/* Platforms support either TileY or Tile4, not both */
+		if (HAS_4TILE(i915) && t->buffers[i].tiling == CLIENT_TILING_Y)
+			t->buffers[i].tiling = CLIENT_TILING_4;
+		else if (!HAS_4TILE(i915) && t->buffers[i].tiling == CLIENT_TILING_4)
+			t->buffers[i].tiling = CLIENT_TILING_Y;
+	}
+
+	return 0;
+}
+
+static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val)
+{
+	int i;
+
+	t->scratch.start_val = val;
+	for (i = 0; i < t->width * t->height; i++)
+		vaddr[i] = val++;
+
+	i915_gem_object_flush_map(t->scratch.vma->obj);
+}
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+	return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct intel_gt *gt,
+			u64 v,
+			unsigned int stride,
+			enum client_tiling tiling,
+			int x_pos, int y_pos)
+{
+	unsigned int swizzle;
+	u64 x, y;
+
+	if (tiling == CLIENT_TILING_LINEAR)
+		return v;
+
+	y = div64_u64_rem(v, stride, &x);
+
+	if (tiling == CLIENT_TILING_4) {
+		v = linear_x_y_to_ftiled_pos(x_pos, y_pos, stride, 32);
+
+		/* no swizzling for f-tiling */
+		swizzle = I915_BIT_6_SWIZZLE_NONE;
+	} else if (tiling == CLIENT_TILING_X) {
+		v = div64_u64_rem(y, 8, &y) * stride * 8;
+		v += y * 512;
+		v += div64_u64_rem(x, 512, &x) << 12;
+		v += x;
+
+		swizzle = gt->ggtt->bit_6_swizzle_x;
+	} else {
+		const unsigned int ytile_span = 16;
+		const unsigned int ytile_height = 512;
+
+		v = div64_u64_rem(y, 32, &y) * stride * 32;
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+
+		swizzle = gt->ggtt->bit_6_swizzle_y;
+	}
+
+	switch (swizzle) {
+	case I915_BIT_6_SWIZZLE_9:
+		v ^= swizzle_bit(9, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+		break;
+	}
+
+	return v;
+}
+
+static const char *repr_tiling(enum client_tiling tiling)
+{
+	switch (tiling) {
+	case CLIENT_TILING_LINEAR: return "linear";
+	case CLIENT_TILING_X: return "X";
+	case CLIENT_TILING_Y: return "Y";
+	case CLIENT_TILING_4: return "F";
+	default: return "unknown";
+	}
+}
+
+static int verify_buffer(const struct tiled_blits *t,
+			 struct blit_buffer *buf,
+			 struct rnd_state *prng)
+{
+	const u32 *vaddr;
+	int ret = 0;
+	int x, y, p;
+
+	x = i915_prandom_u32_max_state(t->width, prng);
+	y = i915_prandom_u32_max_state(t->height, prng);
+	p = y * t->width + x;
+
+	vaddr = i915_gem_object_pin_map_unlocked(buf->vma->obj, I915_MAP_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	if (vaddr[0] != buf->start_val) {
+		ret = -EINVAL;
+	} else {
+		u64 v = tiled_offset(buf->vma->vm->gt,
+				     p * 4, t->width * 4,
+				     buf->tiling, x, y);
+
+		if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p)
+			ret = -EINVAL;
+	}
+	if (ret) {
+		pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n",
+		       repr_tiling(buf->tiling),
+		       x, y, buf->start_val);
+		igt_hexdump(vaddr, 4096);
+	}
+
+	i915_gem_object_unpin_map(buf->vma->obj);
+	return ret;
+}
+
+static int pin_buffer(struct i915_vma *vma, u64 addr)
+{
+	int err;
+
+	if (drm_mm_node_allocated(&vma->node) && i915_vma_offset(vma) != addr) {
+		err = i915_vma_unbind_unlocked(vma);
+		if (err)
+			return err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr);
+	if (err)
+		return err;
+
+	GEM_BUG_ON(i915_vma_offset(vma) != addr);
+	return 0;
+}
+
+static int
+tiled_blit(struct tiled_blits *t,
+	   struct blit_buffer *dst, u64 dst_addr,
+	   struct blit_buffer *src, u64 src_addr)
+{
+	struct i915_request *rq;
+	int err;
+
+	err = pin_buffer(src->vma, src_addr);
+	if (err) {
+		pr_err("Cannot pin src @ %llx\n", src_addr);
+		return err;
+	}
+
+	err = pin_buffer(dst->vma, dst_addr);
+	if (err) {
+		pr_err("Cannot pin dst @ %llx\n", dst_addr);
+		goto err_src;
+	}
+
+	err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH);
+	if (err) {
+		pr_err("cannot pin batch\n");
+		goto err_dst;
+	}
+
+	err = prepare_blit(t, dst, src, t->batch->obj);
+	if (err)
+		goto err_bb;
+
+	rq = intel_context_create_request(t->ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_bb;
+	}
+
+	err = igt_vma_move_to_active_unlocked(t->batch, rq, 0);
+	if (!err)
+		err = igt_vma_move_to_active_unlocked(src->vma, rq, 0);
+	if (!err)
+		err = igt_vma_move_to_active_unlocked(dst->vma, rq, 0);
+	if (!err)
+		err = rq->engine->emit_bb_start(rq,
+						i915_vma_offset(t->batch),
+						i915_vma_size(t->batch),
+						0);
+	i915_request_get(rq);
+	i915_request_add(rq);
+	if (i915_request_wait(rq, 0, HZ / 2) < 0)
+		err = -ETIME;
+	i915_request_put(rq);
+
+	dst->start_val = src->start_val;
+err_bb:
+	i915_vma_unpin(t->batch);
+err_dst:
+	i915_vma_unpin(dst->vma);
+err_src:
+	i915_vma_unpin(src->vma);
+	return err;
+}
+
+static struct tiled_blits *
+tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
+{
+	struct drm_mm_node hole;
+	struct tiled_blits *t;
+	u64 hole_size;
+	int err;
+
+	t = kzalloc(sizeof(*t), GFP_KERNEL);
+	if (!t)
+		return ERR_PTR(-ENOMEM);
+
+	t->ce = intel_context_create(engine);
+	if (IS_ERR(t->ce)) {
+		err = PTR_ERR(t->ce);
+		goto err_free;
+	}
+
+	t->align = i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_LOCAL);
+	t->align = max(t->align,
+		       i915_vm_min_alignment(t->ce->vm, INTEL_MEMORY_SYSTEM));
+
+	hole_size = 2 * round_up(WIDTH * HEIGHT * 4, t->align);
+	hole_size *= 2; /* room to maneuver */
+	hole_size += 2 * t->align; /* padding on either side */
+
+	mutex_lock(&t->ce->vm->mutex);
+	memset(&hole, 0, sizeof(hole));
+	err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole,
+					  hole_size, t->align,
+					  I915_COLOR_UNEVICTABLE,
+					  0, U64_MAX,
+					  DRM_MM_INSERT_BEST);
+	if (!err)
+		drm_mm_remove_node(&hole);
+	mutex_unlock(&t->ce->vm->mutex);
+	if (err) {
+		err = -ENODEV;
+		goto err_put;
+	}
+
+	t->hole = hole.start + t->align;
+	pr_info("Using hole at %llx\n", t->hole);
+
+	err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
+	if (err)
+		goto err_put;
+
+	return t;
+
+err_put:
+	intel_context_put(t->ce);
+err_free:
+	kfree(t);
+	return ERR_PTR(err);
+}
+
+static void tiled_blits_destroy(struct tiled_blits *t)
+{
+	tiled_blits_destroy_buffers(t);
+
+	intel_context_put(t->ce);
+	kfree(t);
+}
+
+static int tiled_blits_prepare(struct tiled_blits *t,
+			       struct rnd_state *prng)
+{
+	u64 offset = round_up(t->width * t->height * 4, t->align);
+	u32 *map;
+	int err;
+	int i;
+
+	map = i915_gem_object_pin_map_unlocked(t->scratch.vma->obj, I915_MAP_WC);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	/* Use scratch to fill objects */
+	for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
+		fill_scratch(t, map, prandom_u32_state(prng));
+		GEM_BUG_ON(verify_buffer(t, &t->scratch, prng));
+
+		err = tiled_blit(t,
+				 &t->buffers[i], t->hole + offset,
+				 &t->scratch, t->hole);
+		if (err == 0)
+			err = verify_buffer(t, &t->buffers[i], prng);
+		if (err) {
+			pr_err("Failed to create buffer %d\n", i);
+			break;
+		}
+	}
+
+	i915_gem_object_unpin_map(t->scratch.vma->obj);
+	return err;
+}
+
+static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
+{
+	u64 offset = round_up(t->width * t->height * 4, 2 * t->align);
+	int err;
+
+	/* We want to check position invariant tiling across GTT eviction */
+
+	err = tiled_blit(t,
+			 &t->buffers[1], t->hole + offset / 2,
+			 &t->buffers[0], t->hole + 2 * offset);
+	if (err)
+		return err;
+
+	/* Simulating GTT eviction of the same buffer / layout */
+	t->buffers[2].tiling = t->buffers[0].tiling;
+
+	/* Reposition so that we overlap the old addresses, and slightly off */
+	err = tiled_blit(t,
+			 &t->buffers[2], t->hole + t->align,
+			 &t->buffers[1], t->hole + 3 * offset / 2);
+	if (err)
+		return err;
+
+	err = verify_buffer(t, &t->buffers[2], prng);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int __igt_client_tiled_blits(struct intel_engine_cs *engine,
+				    struct rnd_state *prng)
+{
+	struct tiled_blits *t;
+	int err;
+
+	t = tiled_blits_create(engine, prng);
+	if (IS_ERR(t))
+		return PTR_ERR(t);
+
+	err = tiled_blits_prepare(t, prng);
+	if (err)
+		goto out;
+
+	err = tiled_blits_bounce(t, prng);
+	if (err)
+		goto out;
+
+out:
+	tiled_blits_destroy(t);
+	return err;
+}
+
+static bool has_bit17_swizzle(int sw)
+{
+	return (sw == I915_BIT_6_SWIZZLE_9_10_17 ||
+		sw == I915_BIT_6_SWIZZLE_9_17);
+}
+
+static bool bad_swizzling(struct drm_i915_private *i915)
+{
+	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
+
+	if (i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
+		return true;
+
+	if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) ||
+	    has_bit17_swizzle(ggtt->bit_6_swizzle_y))
+		return true;
+
+	return false;
+}
+
+static int igt_client_tiled_blits(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	I915_RND_STATE(prng);
+	int inst = 0;
+
+	/* Test requires explicit BLT tiling controls */
+	if (GRAPHICS_VER(i915) < 4)
+		return 0;
+
+	if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */
+		return 0;
+
+	do {
+		struct intel_engine_cs *engine;
+		int err;
+
+		engine = intel_engine_lookup_user(i915,
+						  I915_ENGINE_CLASS_COPY,
+						  inst++);
+		if (!engine)
+			return 0;
+
+		err = __igt_client_tiled_blits(engine, &prng);
+		if (err == -ENODEV)
+			err = 0;
+		if (err)
+			return err;
+	} while (1);
+}
+
+int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_client_tiled_blits),
+	};
+
+	if (intel_gt_is_wedged(to_gt(i915)))
+		return 0;
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
new file mode 100644
index 0000000000..3bef1beec7
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c
@@ -0,0 +1,436 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_ring.h"
+
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+
+struct context {
+	struct drm_i915_gem_object *obj;
+	struct intel_engine_cs *engine;
+};
+
+static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
+{
+	unsigned int needs_clflush;
+	struct page *page;
+	void *map;
+	u32 *cpu;
+	int err;
+
+	i915_gem_object_lock(ctx->obj, NULL);
+	err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
+	if (err)
+		goto out;
+
+	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
+	map = kmap_atomic(page);
+	cpu = map + offset_in_page(offset);
+
+	if (needs_clflush & CLFLUSH_BEFORE)
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+
+	*cpu = v;
+
+	if (needs_clflush & CLFLUSH_AFTER)
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+
+	kunmap_atomic(map);
+	i915_gem_object_finish_access(ctx->obj);
+
+out:
+	i915_gem_object_unlock(ctx->obj);
+	return err;
+}
+
+static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
+{
+	unsigned int needs_clflush;
+	struct page *page;
+	void *map;
+	u32 *cpu;
+	int err;
+
+	i915_gem_object_lock(ctx->obj, NULL);
+	err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
+	if (err)
+		goto out;
+
+	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
+	map = kmap_atomic(page);
+	cpu = map + offset_in_page(offset);
+
+	if (needs_clflush & CLFLUSH_BEFORE)
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+
+	*v = *cpu;
+
+	kunmap_atomic(map);
+	i915_gem_object_finish_access(ctx->obj);
+
+out:
+	i915_gem_object_unlock(ctx->obj);
+	return err;
+}
+
+static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
+{
+	struct i915_vma *vma;
+	u32 __iomem *map;
+	int err = 0;
+
+	i915_gem_object_lock(ctx->obj, NULL);
+	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+	i915_gem_object_unlock(ctx->obj);
+	if (err)
+		return err;
+
+	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	intel_gt_pm_get(vma->vm->gt);
+
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out_rpm;
+	}
+
+	iowrite32(v, &map[offset / sizeof(*map)]);
+	i915_vma_unpin_iomap(vma);
+
+out_rpm:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
+}
+
+static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
+{
+	struct i915_vma *vma;
+	u32 __iomem *map;
+	int err = 0;
+
+	i915_gem_object_lock(ctx->obj, NULL);
+	err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
+	i915_gem_object_unlock(ctx->obj);
+	if (err)
+		return err;
+
+	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	intel_gt_pm_get(vma->vm->gt);
+
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out_rpm;
+	}
+
+	*v = ioread32(&map[offset / sizeof(*map)]);
+	i915_vma_unpin_iomap(vma);
+
+out_rpm:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
+}
+
+static int wc_set(struct context *ctx, unsigned long offset, u32 v)
+{
+	u32 *map;
+	int err;
+
+	i915_gem_object_lock(ctx->obj, NULL);
+	err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
+	i915_gem_object_unlock(ctx->obj);
+	if (err)
+		return err;
+
+	map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	map[offset / sizeof(*map)] = v;
+
+	__i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map));
+	i915_gem_object_unpin_map(ctx->obj);
+
+	return 0;
+}
+
+static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
+{
+	u32 *map;
+	int err;
+
+	i915_gem_object_lock(ctx->obj, NULL);
+	err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
+	i915_gem_object_unlock(ctx->obj);
+	if (err)
+		return err;
+
+	map = i915_gem_object_pin_map_unlocked(ctx->obj, I915_MAP_WC);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	*v = map[offset / sizeof(*map)];
+	i915_gem_object_unpin_map(ctx->obj);
+
+	return 0;
+}
+
+static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
+{
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cs;
+	int err;
+
+	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	i915_gem_object_lock(ctx->obj, NULL);
+	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
+	if (err)
+		goto out_unlock;
+
+	rq = intel_engine_create_kernel_request(ctx->engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto out_unpin;
+	}
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs)) {
+		err = PTR_ERR(cs);
+		goto out_rq;
+	}
+
+	if (GRAPHICS_VER(ctx->engine->i915) >= 8) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
+		*cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
+		*cs++ = v;
+	} else if (GRAPHICS_VER(ctx->engine->i915) >= 4) {
+		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
+		*cs++ = 0;
+		*cs++ = i915_ggtt_offset(vma) + offset;
+		*cs++ = v;
+	} else {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*cs++ = i915_ggtt_offset(vma) + offset;
+		*cs++ = v;
+		*cs++ = MI_NOOP;
+	}
+	intel_ring_advance(rq, cs);
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+
+out_rq:
+	i915_request_add(rq);
+out_unpin:
+	i915_vma_unpin(vma);
+out_unlock:
+	i915_gem_object_unlock(ctx->obj);
+
+	return err;
+}
+
+static bool always_valid(struct context *ctx)
+{
+	return true;
+}
+
+static bool needs_fence_registers(struct context *ctx)
+{
+	struct intel_gt *gt = ctx->engine->gt;
+
+	if (intel_gt_is_wedged(gt))
+		return false;
+
+	return gt->ggtt->num_fences;
+}
+
+static bool needs_mi_store_dword(struct context *ctx)
+{
+	if (intel_gt_is_wedged(ctx->engine->gt))
+		return false;
+
+	return intel_engine_can_store_dword(ctx->engine);
+}
+
+static const struct igt_coherency_mode {
+	const char *name;
+	int (*set)(struct context *ctx, unsigned long offset, u32 v);
+	int (*get)(struct context *ctx, unsigned long offset, u32 *v);
+	bool (*valid)(struct context *ctx);
+} igt_coherency_mode[] = {
+	{ "cpu", cpu_set, cpu_get, always_valid },
+	{ "gtt", gtt_set, gtt_get, needs_fence_registers },
+	{ "wc", wc_set, wc_get, always_valid },
+	{ "gpu", gpu_set, NULL, needs_mi_store_dword },
+	{ },
+};
+
+static struct intel_engine_cs *
+random_engine(struct drm_i915_private *i915, struct rnd_state *prng)
+{
+	struct intel_engine_cs *engine;
+	unsigned int count;
+
+	count = 0;
+	for_each_uabi_engine(engine, i915)
+		count++;
+
+	count = i915_prandom_u32_max_state(count, prng);
+	for_each_uabi_engine(engine, i915)
+		if (count-- == 0)
+			return engine;
+
+	return NULL;
+}
+
+static int igt_gem_coherency(void *arg)
+{
+	const unsigned int ncachelines = PAGE_SIZE/64;
+	struct drm_i915_private *i915 = arg;
+	const struct igt_coherency_mode *read, *write, *over;
+	unsigned long count, n;
+	u32 *offsets, *values;
+	I915_RND_STATE(prng);
+	struct context ctx;
+	int err = 0;
+
+	/*
+	 * We repeatedly write, overwrite and read from a sequence of
+	 * cachelines in order to try and detect incoherency (unflushed writes
+	 * from either the CPU or GPU). Each setter/getter uses our cache
+	 * domain API which should prevent incoherency.
+	 */
+
+	offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL);
+	if (!offsets)
+		return -ENOMEM;
+	for (count = 0; count < ncachelines; count++)
+		offsets[count] = count * 64 + 4 * (count % 16);
+
+	values = offsets + ncachelines;
+
+	ctx.engine = random_engine(i915, &prng);
+	if (!ctx.engine) {
+		err = -ENODEV;
+		goto out_free;
+	}
+	pr_info("%s: using %s\n", __func__, ctx.engine->name);
+	intel_engine_pm_get(ctx.engine);
+
+	for (over = igt_coherency_mode; over->name; over++) {
+		if (!over->set)
+			continue;
+
+		if (!over->valid(&ctx))
+			continue;
+
+		for (write = igt_coherency_mode; write->name; write++) {
+			if (!write->set)
+				continue;
+
+			if (!write->valid(&ctx))
+				continue;
+
+			for (read = igt_coherency_mode; read->name; read++) {
+				if (!read->get)
+					continue;
+
+				if (!read->valid(&ctx))
+					continue;
+
+				for_each_prime_number_from(count, 1, ncachelines) {
+					ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+					if (IS_ERR(ctx.obj)) {
+						err = PTR_ERR(ctx.obj);
+						goto out_pm;
+					}
+
+					i915_random_reorder(offsets, ncachelines, &prng);
+					for (n = 0; n < count; n++)
+						values[n] = prandom_u32_state(&prng);
+
+					for (n = 0; n < count; n++) {
+						err = over->set(&ctx, offsets[n], ~values[n]);
+						if (err) {
+							pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
+							       n, count, over->name, err);
+							goto put_object;
+						}
+					}
+
+					for (n = 0; n < count; n++) {
+						err = write->set(&ctx, offsets[n], values[n]);
+						if (err) {
+							pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
+							       n, count, write->name, err);
+							goto put_object;
+						}
+					}
+
+					for (n = 0; n < count; n++) {
+						u32 found;
+
+						err = read->get(&ctx, offsets[n], &found);
+						if (err) {
+							pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
+							       n, count, read->name, err);
+							goto put_object;
+						}
+
+						if (found != values[n]) {
+							pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
+							       n, count, over->name,
+							       write->name, values[n],
+							       read->name, found,
+							       ~values[n], offsets[n]);
+							err = -EINVAL;
+							goto put_object;
+						}
+					}
+
+					i915_gem_object_put(ctx.obj);
+				}
+			}
+		}
+	}
+out_pm:
+	intel_engine_pm_put(ctx.engine);
+out_free:
+	kfree(offsets);
+	return err;
+
+put_object:
+	i915_gem_object_put(ctx.obj);
+	goto out_pm;
+}
+
+int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gem_coherency),
+	};
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
new file mode 100644
index 0000000000..7021b6e9b2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c
@@ -0,0 +1,1915 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2017 Intel Corporation
+ */
+
+#include <linux/prime_numbers.h>
+#include <linux/string_helpers.h>
+
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_engine_regs.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_requests.h"
+#include "gt/intel_reset.h"
+#include "i915_selftest.h"
+
+#include "gem/selftests/igt_gem_utils.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_live_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gem_device.h"
+
+#include "huge_gem_object.h"
+#include "igt_gem_utils.h"
+
+#define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
+
+static int live_nop_switch(void *arg)
+{
+	const unsigned int nctx = 1024;
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	struct i915_gem_context **ctx;
+	struct igt_live_test t;
+	struct file *file;
+	unsigned long n;
+	int err = -ENODEV;
+
+	/*
+	 * Create as many contexts as we can feasibly get away with
+	 * and check we can switch between them rapidly.
+	 *
+	 * Serves as very simple stress test for submission and HW switching
+	 * between contexts.
+	 */
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx) {
+		err = -ENOMEM;
+		goto out_file;
+	}
+
+	for (n = 0; n < nctx; n++) {
+		ctx[n] = live_context(i915, file);
+		if (IS_ERR(ctx[n])) {
+			err = PTR_ERR(ctx[n]);
+			goto out_ctx;
+		}
+	}
+
+	for_each_uabi_engine(engine, i915) {
+		struct i915_request *rq = NULL;
+		unsigned long end_time, prime;
+		ktime_t times[2] = {};
+
+		times[0] = ktime_get_raw();
+		for (n = 0; n < nctx; n++) {
+			struct i915_request *this;
+
+			this = igt_request_alloc(ctx[n], engine);
+			if (IS_ERR(this)) {
+				err = PTR_ERR(this);
+				goto out_ctx;
+			}
+			if (rq) {
+				i915_request_await_dma_fence(this, &rq->fence);
+				i915_request_put(rq);
+			}
+			rq = i915_request_get(this);
+			i915_request_add(this);
+		}
+		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
+			pr_err("Failed to populated %d contexts\n", nctx);
+			intel_gt_set_wedged(engine->gt);
+			i915_request_put(rq);
+			err = -EIO;
+			goto out_ctx;
+		}
+		i915_request_put(rq);
+
+		times[1] = ktime_get_raw();
+
+		pr_info("Populated %d contexts on %s in %lluns\n",
+			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
+
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
+		if (err)
+			goto out_ctx;
+
+		end_time = jiffies + i915_selftest.timeout_jiffies;
+		for_each_prime_number_from(prime, 2, 8192) {
+			times[1] = ktime_get_raw();
+
+			rq = NULL;
+			for (n = 0; n < prime; n++) {
+				struct i915_request *this;
+
+				this = igt_request_alloc(ctx[n % nctx], engine);
+				if (IS_ERR(this)) {
+					err = PTR_ERR(this);
+					goto out_ctx;
+				}
+
+				if (rq) { /* Force submission order */
+					i915_request_await_dma_fence(this, &rq->fence);
+					i915_request_put(rq);
+				}
+
+				/*
+				 * This space is left intentionally blank.
+				 *
+				 * We do not actually want to perform any
+				 * action with this request, we just want
+				 * to measure the latency in allocation
+				 * and submission of our breadcrumbs -
+				 * ensuring that the bare request is sufficient
+				 * for the system to work (i.e. proper HEAD
+				 * tracking of the rings, interrupt handling,
+				 * etc). It also gives us the lowest bounds
+				 * for latency.
+				 */
+
+				rq = i915_request_get(this);
+				i915_request_add(this);
+			}
+			GEM_BUG_ON(!rq);
+			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+				pr_err("Switching between %ld contexts timed out\n",
+				       prime);
+				intel_gt_set_wedged(engine->gt);
+				i915_request_put(rq);
+				break;
+			}
+			i915_request_put(rq);
+
+			times[1] = ktime_sub(ktime_get_raw(), times[1]);
+			if (prime == 2)
+				times[0] = times[1];
+
+			if (__igt_timeout(end_time, NULL))
+				break;
+		}
+
+		err = igt_live_test_end(&t);
+		if (err)
+			goto out_ctx;
+
+		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
+			engine->name,
+			ktime_to_ns(times[0]),
+			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
+	}
+
+out_ctx:
+	kfree(ctx);
+out_file:
+	fput(file);
+	return err;
+}
+
+struct parallel_switch {
+	struct kthread_worker *worker;
+	struct kthread_work work;
+	struct intel_context *ce[2];
+	int result;
+};
+
+static void __live_parallel_switch1(struct kthread_work *work)
+{
+	struct parallel_switch *arg =
+		container_of(work, typeof(*arg), work);
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+
+	count = 0;
+	arg->result = 0;
+	do {
+		struct i915_request *rq = NULL;
+		int n;
+
+		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
+			struct i915_request *prev = rq;
+
+			rq = i915_request_create(arg->ce[n]);
+			if (IS_ERR(rq)) {
+				i915_request_put(prev);
+				arg->result = PTR_ERR(rq);
+				break;
+			}
+
+			i915_request_get(rq);
+			if (prev) {
+				arg->result =
+					i915_request_await_dma_fence(rq,
+								     &prev->fence);
+				i915_request_put(prev);
+			}
+
+			i915_request_add(rq);
+		}
+
+		if (IS_ERR_OR_NULL(rq))
+			break;
+
+		if (i915_request_wait(rq, 0, HZ) < 0)
+			arg->result = -ETIME;
+
+		i915_request_put(rq);
+
+		count++;
+	} while (!arg->result && !__igt_timeout(end_time, NULL));
+
+	pr_info("%s: %lu switches (sync) <%d>\n",
+		arg->ce[0]->engine->name, count, arg->result);
+}
+
+static void __live_parallel_switchN(struct kthread_work *work)
+{
+	struct parallel_switch *arg =
+		container_of(work, typeof(*arg), work);
+	struct i915_request *rq = NULL;
+	IGT_TIMEOUT(end_time);
+	unsigned long count;
+	int n;
+
+	count = 0;
+	arg->result = 0;
+	do {
+		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
+			struct i915_request *prev = rq;
+
+			rq = i915_request_create(arg->ce[n]);
+			if (IS_ERR(rq)) {
+				i915_request_put(prev);
+				arg->result = PTR_ERR(rq);
+				break;
+			}
+
+			i915_request_get(rq);
+			if (prev) {
+				arg->result =
+					i915_request_await_dma_fence(rq,
+								     &prev->fence);
+				i915_request_put(prev);
+			}
+
+			i915_request_add(rq);
+		}
+
+		count++;
+	} while (!arg->result && !__igt_timeout(end_time, NULL));
+
+	if (!IS_ERR_OR_NULL(rq))
+		i915_request_put(rq);
+
+	pr_info("%s: %lu switches (many) <%d>\n",
+		arg->ce[0]->engine->name, count, arg->result);
+}
+
+static int live_parallel_switch(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	static void (* const func[])(struct kthread_work *) = {
+		__live_parallel_switch1,
+		__live_parallel_switchN,
+		NULL,
+	};
+	struct parallel_switch *data = NULL;
+	struct i915_gem_engines *engines;
+	struct i915_gem_engines_iter it;
+	void (* const *fn)(struct kthread_work *);
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	struct file *file;
+	int n, m, count;
+	int err = 0;
+
+	/*
+	 * Check we can process switches on all engines simultaneously.
+	 */
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = live_context(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_file;
+	}
+
+	engines = i915_gem_context_lock_engines(ctx);
+	count = engines->num_engines;
+
+	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
+	if (!data) {
+		i915_gem_context_unlock_engines(ctx);
+		err = -ENOMEM;
+		goto out_file;
+	}
+
+	m = 0; /* Use the first context as our template for the engines */
+	for_each_gem_engine(ce, engines, it) {
+		err = intel_context_pin(ce);
+		if (err) {
+			i915_gem_context_unlock_engines(ctx);
+			goto out;
+		}
+		data[m++].ce[0] = intel_context_get(ce);
+	}
+	i915_gem_context_unlock_engines(ctx);
+
+	/* Clone the same set of engines into the other contexts */
+	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
+		ctx = live_context(i915, file);
+		if (IS_ERR(ctx)) {
+			err = PTR_ERR(ctx);
+			goto out;
+		}
+
+		for (m = 0; m < count; m++) {
+			if (!data[m].ce[0])
+				continue;
+
+			ce = intel_context_create(data[m].ce[0]->engine);
+			if (IS_ERR(ce)) {
+				err = PTR_ERR(ce);
+				goto out;
+			}
+
+			err = intel_context_pin(ce);
+			if (err) {
+				intel_context_put(ce);
+				goto out;
+			}
+
+			data[m].ce[n] = ce;
+		}
+	}
+
+	for (n = 0; n < count; n++) {
+		struct kthread_worker *worker;
+
+		if (!data[n].ce[0])
+			continue;
+
+		worker = kthread_create_worker(0, "igt/parallel:%s",
+					       data[n].ce[0]->engine->name);
+		if (IS_ERR(worker)) {
+			err = PTR_ERR(worker);
+			goto out;
+		}
+
+		data[n].worker = worker;
+	}
+
+	for (fn = func; !err && *fn; fn++) {
+		struct igt_live_test t;
+
+		err = igt_live_test_begin(&t, i915, __func__, "");
+		if (err)
+			break;
+
+		for (n = 0; n < count; n++) {
+			if (!data[n].ce[0])
+				continue;
+
+			data[n].result = 0;
+			kthread_init_work(&data[n].work, *fn);
+			kthread_queue_work(data[n].worker, &data[n].work);
+		}
+
+		for (n = 0; n < count; n++) {
+			if (data[n].ce[0]) {
+				kthread_flush_work(&data[n].work);
+				if (data[n].result && !err)
+					err = data[n].result;
+			}
+		}
+
+		if (igt_live_test_end(&t)) {
+			err = err ?: -EIO;
+			break;
+		}
+	}
+
+out:
+	for (n = 0; n < count; n++) {
+		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
+			if (!data[n].ce[m])
+				continue;
+
+			intel_context_unpin(data[n].ce[m]);
+			intel_context_put(data[n].ce[m]);
+		}
+
+		if (data[n].worker)
+			kthread_destroy_worker(data[n].worker);
+	}
+	kfree(data);
+out_file:
+	fput(file);
+	return err;
+}
+
+static unsigned long real_page_count(struct drm_i915_gem_object *obj)
+{
+	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
+}
+
+static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
+{
+	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
+}
+
+static int gpu_fill(struct intel_context *ce,
+		    struct drm_i915_gem_object *obj,
+		    unsigned int dw)
+{
+	struct i915_vma *vma;
+	int err;
+
+	GEM_BUG_ON(obj->base.size > ce->vm->total);
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
+	if (err)
+		return err;
+
+	/*
+	 * Within the GTT the huge objects maps every page onto
+	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
+	 * We set the nth dword within the page using the nth
+	 * mapping via the GTT - this should exercise the GTT mapping
+	 * whilst checking that each context provides a unique view
+	 * into the object.
+	 */
+	err = igt_gpu_fill_dw(ce, vma,
+			      (dw * real_page_count(obj)) << PAGE_SHIFT |
+			      (dw * sizeof(u32)),
+			      real_page_count(obj),
+			      dw);
+	i915_vma_unpin(vma);
+
+	return err;
+}
+
+static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
+{
+	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
+	unsigned int need_flush;
+	unsigned long n, m;
+	int err;
+
+	i915_gem_object_lock(obj, NULL);
+	err = i915_gem_object_prepare_write(obj, &need_flush);
+	if (err)
+		goto out;
+
+	for (n = 0; n < real_page_count(obj); n++) {
+		u32 *map;
+
+		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		for (m = 0; m < DW_PER_PAGE; m++)
+			map[m] = value;
+		if (!has_llc)
+			drm_clflush_virt_range(map, PAGE_SIZE);
+		kunmap_atomic(map);
+	}
+
+	i915_gem_object_finish_access(obj);
+	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
+	obj->write_domain = 0;
+out:
+	i915_gem_object_unlock(obj);
+	return err;
+}
+
+static noinline int cpu_check(struct drm_i915_gem_object *obj,
+			      unsigned int idx, unsigned int max)
+{
+	unsigned int needs_flush;
+	unsigned long n;
+	int err;
+
+	i915_gem_object_lock(obj, NULL);
+	err = i915_gem_object_prepare_read(obj, &needs_flush);
+	if (err)
+		goto out_unlock;
+
+	for (n = 0; n < real_page_count(obj); n++) {
+		u32 *map, m;
+
+		map = kmap_atomic(i915_gem_object_get_page(obj, n));
+		if (needs_flush & CLFLUSH_BEFORE)
+			drm_clflush_virt_range(map, PAGE_SIZE);
+
+		for (m = 0; m < max; m++) {
+			if (map[m] != m) {
+				pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n",
+				       __builtin_return_address(0), idx,
+				       n, real_page_count(obj), m, max,
+				       map[m], m);
+				err = -EINVAL;
+				goto out_unmap;
+			}
+		}
+
+		for (; m < DW_PER_PAGE; m++) {
+			if (map[m] != STACK_MAGIC) {
+				pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n",
+				       __builtin_return_address(0), idx, n, m,
+				       map[m], STACK_MAGIC);
+				err = -EINVAL;
+				goto out_unmap;
+			}
+		}
+
+out_unmap:
+		kunmap_atomic(map);
+		if (err)
+			break;
+	}
+
+	i915_gem_object_finish_access(obj);
+out_unlock:
+	i915_gem_object_unlock(obj);
+	return err;
+}
+
+static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
+{
+	int err;
+
+	GEM_BUG_ON(obj->base.handle_count);
+
+	/* tie the object to the drm_file for easy reaping */
+	err = idr_alloc(&to_drm_file(file)->object_idr,
+			&obj->base, 1, 0, GFP_KERNEL);
+	if (err < 0)
+		return err;
+
+	i915_gem_object_get(obj);
+	obj->base.handle_count++;
+	return 0;
+}
+
+static struct drm_i915_gem_object *
+create_test_object(struct i915_address_space *vm,
+		   struct file *file,
+		   struct list_head *objects)
+{
+	struct drm_i915_gem_object *obj;
+	u64 size;
+	int err;
+
+	/* Keep in GEM's good graces */
+	intel_gt_retire_requests(vm->gt);
+
+	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
+	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
+
+	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
+	if (IS_ERR(obj))
+		return obj;
+
+	err = file_add_object(file, obj);
+	i915_gem_object_put(obj);
+	if (err)
+		return ERR_PTR(err);
+
+	err = cpu_fill(obj, STACK_MAGIC);
+	if (err) {
+		pr_err("Failed to fill object with cpu, err=%d\n",
+		       err);
+		return ERR_PTR(err);
+	}
+
+	list_add_tail(&obj->st_link, objects);
+	return obj;
+}
+
+static unsigned long max_dwords(struct drm_i915_gem_object *obj)
+{
+	unsigned long npages = fake_page_count(obj);
+
+	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
+	return npages / DW_PER_PAGE;
+}
+
+static void throttle_release(struct i915_request **q, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		if (IS_ERR_OR_NULL(q[i]))
+			continue;
+
+		i915_request_put(fetch_and_zero(&q[i]));
+	}
+}
+
+static int throttle(struct intel_context *ce,
+		    struct i915_request **q, int count)
+{
+	int i;
+
+	if (!IS_ERR_OR_NULL(q[0])) {
+		if (i915_request_wait(q[0],
+				      I915_WAIT_INTERRUPTIBLE,
+				      MAX_SCHEDULE_TIMEOUT) < 0)
+			return -EINTR;
+
+		i915_request_put(q[0]);
+	}
+
+	for (i = 0; i < count - 1; i++)
+		q[i] = q[i + 1];
+
+	q[i] = intel_context_create_request(ce);
+	if (IS_ERR(q[i]))
+		return PTR_ERR(q[i]);
+
+	i915_request_get(q[i]);
+	i915_request_add(q[i]);
+
+	return 0;
+}
+
+static int igt_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_engine_cs *engine;
+	int err = -ENODEV;
+
+	/*
+	 * Create a few different contexts (with different mm) and write
+	 * through each ctx/mm using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	for_each_uabi_engine(engine, i915) {
+		struct drm_i915_gem_object *obj = NULL;
+		unsigned long ncontexts, ndwords, dw;
+		struct i915_request *tq[5] = {};
+		struct igt_live_test t;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+		struct file *file;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		if (!engine->context_size)
+			continue; /* No logical context support in HW */
+
+		file = mock_file(i915);
+		if (IS_ERR(file))
+			return PTR_ERR(file);
+
+		err = igt_live_test_begin(&t, i915, __func__, engine->name);
+		if (err)
+			goto out_file;
+
+		ncontexts = 0;
+		ndwords = 0;
+		dw = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+			struct intel_context *ce;
+
+			ctx = kernel_context(i915, NULL);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_file;
+			}
+
+			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+			GEM_BUG_ON(IS_ERR(ce));
+
+			if (!obj) {
+				obj = create_test_object(ce->vm, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					intel_context_put(ce);
+					kernel_context_close(ctx);
+					goto out_file;
+				}
+			}
+
+			err = gpu_fill(ce, obj, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name,
+				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
+				       err);
+				intel_context_put(ce);
+				kernel_context_close(ctx);
+				goto out_file;
+			}
+
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				intel_context_put(ce);
+				kernel_context_close(ctx);
+				goto out_file;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+
+			intel_context_put(ce);
+			kernel_context_close(ctx);
+		}
+
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				break;
+
+			dw += rem;
+		}
+
+out_file:
+		throttle_release(tq, ARRAY_SIZE(tq));
+		if (igt_live_test_end(&t))
+			err = -EIO;
+
+		fput(file);
+		if (err)
+			return err;
+
+		i915_gem_drain_freed_objects(i915);
+	}
+
+	return 0;
+}
+
+static int igt_shared_ctx_exec(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_request *tq[5] = {};
+	struct i915_gem_context *parent;
+	struct intel_engine_cs *engine;
+	struct igt_live_test t;
+	struct file *file;
+	int err = 0;
+
+	/*
+	 * Create a few different contexts with the same mm and write
+	 * through each ctx using the GPU making sure those writes end
+	 * up in the expected pages of our obj.
+	 */
+	if (!DRIVER_CAPS(i915)->has_logical_contexts)
+		return 0;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	parent = live_context(i915, file);
+	if (IS_ERR(parent)) {
+		err = PTR_ERR(parent);
+		goto out_file;
+	}
+
+	if (!parent->vm) { /* not full-ppgtt; nothing to share */
+		err = 0;
+		goto out_file;
+	}
+
+	err = igt_live_test_begin(&t, i915, __func__, "");
+	if (err)
+		goto out_file;
+
+	for_each_uabi_engine(engine, i915) {
+		unsigned long ncontexts, ndwords, dw;
+		struct drm_i915_gem_object *obj = NULL;
+		IGT_TIMEOUT(end_time);
+		LIST_HEAD(objects);
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		dw = 0;
+		ndwords = 0;
+		ncontexts = 0;
+		while (!time_after(jiffies, end_time)) {
+			struct i915_gem_context *ctx;
+			struct intel_context *ce;
+
+			ctx = kernel_context(i915, parent->vm);
+			if (IS_ERR(ctx)) {
+				err = PTR_ERR(ctx);
+				goto out_test;
+			}
+
+			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+			GEM_BUG_ON(IS_ERR(ce));
+
+			if (!obj) {
+				obj = create_test_object(parent->vm,
+							 file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					intel_context_put(ce);
+					kernel_context_close(ctx);
+					goto out_test;
+				}
+			}
+
+			err = gpu_fill(ce, obj, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       engine->name,
+				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
+				       err);
+				intel_context_put(ce);
+				kernel_context_close(ctx);
+				goto out_test;
+			}
+
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				intel_context_put(ce);
+				kernel_context_close(ctx);
+				goto out_test;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+
+			ndwords++;
+			ncontexts++;
+
+			intel_context_put(ce);
+			kernel_context_close(ctx);
+		}
+		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
+			ncontexts, engine->name, ndwords);
+
+		ncontexts = dw = 0;
+		list_for_each_entry(obj, &objects, st_link) {
+			unsigned int rem =
+				min_t(unsigned int, ndwords - dw, max_dwords(obj));
+
+			err = cpu_check(obj, ncontexts++, rem);
+			if (err)
+				goto out_test;
+
+			dw += rem;
+		}
+
+		i915_gem_drain_freed_objects(i915);
+	}
+out_test:
+	throttle_release(tq, ARRAY_SIZE(tq));
+	if (igt_live_test_end(&t))
+		err = -EIO;
+out_file:
+	fput(file);
+	return err;
+}
+
+static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
+			    struct i915_vma *vma,
+			    struct intel_engine_cs *engine)
+{
+	u32 *cmd;
+
+	GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
+
+	cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
+	if (IS_ERR(cmd))
+		return PTR_ERR(cmd);
+
+	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
+	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
+	*cmd++ = lower_32_bits(i915_vma_offset(vma));
+	*cmd++ = upper_32_bits(i915_vma_offset(vma));
+	*cmd = MI_BATCH_BUFFER_END;
+
+	__i915_gem_object_flush_map(rpcs, 0, 64);
+	i915_gem_object_unpin_map(rpcs);
+
+	intel_gt_chipset_flush(vma->vm->gt);
+
+	return 0;
+}
+
+static int
+emit_rpcs_query(struct drm_i915_gem_object *obj,
+		struct intel_context *ce,
+		struct i915_request **rq_out)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_request *rq;
+	struct i915_gem_ww_ctx ww;
+	struct i915_vma *batch;
+	struct i915_vma *vma;
+	struct drm_i915_gem_object *rpcs;
+	int err;
+
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
+
+	if (GRAPHICS_VER(i915) < 8)
+		return -EINVAL;
+
+	vma = i915_vma_instance(obj, ce->vm, NULL);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(rpcs))
+		return PTR_ERR(rpcs);
+
+	batch = i915_vma_instance(rpcs, ce->vm, NULL);
+	if (IS_ERR(batch)) {
+		err = PTR_ERR(batch);
+		goto err_put;
+	}
+
+	i915_gem_ww_ctx_init(&ww, false);
+retry:
+	err = i915_gem_object_lock(obj, &ww);
+	if (!err)
+		err = i915_gem_object_lock(rpcs, &ww);
+	if (!err)
+		err = i915_gem_object_set_to_gtt_domain(obj, false);
+	if (!err)
+		err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+	if (err)
+		goto err_put;
+
+	err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
+	if (err)
+		goto err_vma;
+
+	err = rpcs_query_batch(rpcs, vma, ce->engine);
+	if (err)
+		goto err_batch;
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	err = i915_vma_move_to_active(batch, rq, 0);
+	if (err)
+		goto skip_request;
+
+	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
+
+	if (rq->engine->emit_init_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto skip_request;
+	}
+
+	err = rq->engine->emit_bb_start(rq,
+					i915_vma_offset(batch),
+					i915_vma_size(batch),
+					0);
+	if (err)
+		goto skip_request;
+
+	*rq_out = i915_request_get(rq);
+
+skip_request:
+	if (err)
+		i915_request_set_error_once(rq, err);
+	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin(batch);
+err_vma:
+	i915_vma_unpin(vma);
+err_put:
+	if (err == -EDEADLK) {
+		err = i915_gem_ww_ctx_backoff(&ww);
+		if (!err)
+			goto retry;
+	}
+	i915_gem_ww_ctx_fini(&ww);
+	i915_gem_object_put(rpcs);
+	return err;
+}
+
+#define TEST_IDLE	BIT(0)
+#define TEST_BUSY	BIT(1)
+#define TEST_RESET	BIT(2)
+
+static int
+__sseu_prepare(const char *name,
+	       unsigned int flags,
+	       struct intel_context *ce,
+	       struct igt_spinner **spin)
+{
+	struct i915_request *rq;
+	int ret;
+
+	*spin = NULL;
+	if (!(flags & (TEST_BUSY | TEST_RESET)))
+		return 0;
+
+	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
+	if (!*spin)
+		return -ENOMEM;
+
+	ret = igt_spinner_init(*spin, ce->engine->gt);
+	if (ret)
+		goto err_free;
+
+	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
+	if (IS_ERR(rq)) {
+		ret = PTR_ERR(rq);
+		goto err_fini;
+	}
+
+	i915_request_add(rq);
+
+	if (!igt_wait_for_spinner(*spin, rq)) {
+		pr_err("%s: Spinner failed to start!\n", name);
+		ret = -ETIMEDOUT;
+		goto err_end;
+	}
+
+	return 0;
+
+err_end:
+	igt_spinner_end(*spin);
+err_fini:
+	igt_spinner_fini(*spin);
+err_free:
+	kfree(fetch_and_zero(spin));
+	return ret;
+}
+
+static int
+__read_slice_count(struct intel_context *ce,
+		   struct drm_i915_gem_object *obj,
+		   struct igt_spinner *spin,
+		   u32 *rpcs)
+{
+	struct i915_request *rq = NULL;
+	u32 s_mask, s_shift;
+	unsigned int cnt;
+	u32 *buf, val;
+	long ret;
+
+	ret = emit_rpcs_query(obj, ce, &rq);
+	if (ret)
+		return ret;
+
+	if (spin)
+		igt_spinner_end(spin);
+
+	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
+	i915_request_put(rq);
+	if (ret < 0)
+		return ret;
+
+	buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		return ret;
+	}
+
+	if (GRAPHICS_VER(ce->engine->i915) >= 11) {
+		s_mask = GEN11_RPCS_S_CNT_MASK;
+		s_shift = GEN11_RPCS_S_CNT_SHIFT;
+	} else {
+		s_mask = GEN8_RPCS_S_CNT_MASK;
+		s_shift = GEN8_RPCS_S_CNT_SHIFT;
+	}
+
+	val = *buf;
+	cnt = (val & s_mask) >> s_shift;
+	*rpcs = val;
+
+	i915_gem_object_unpin_map(obj);
+
+	return cnt;
+}
+
+static int
+__check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
+	     const char *prefix, const char *suffix)
+{
+	if (slices == expected)
+		return 0;
+
+	if (slices < 0) {
+		pr_err("%s: %s read slice count failed with %d%s\n",
+		       name, prefix, slices, suffix);
+		return slices;
+	}
+
+	pr_err("%s: %s slice count %d is not %u%s\n",
+	       name, prefix, slices, expected, suffix);
+
+	pr_info("RPCS=0x%x; %u%sx%u%s\n",
+		rpcs, slices,
+		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
+		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
+		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
+
+	return -EINVAL;
+}
+
+static int
+__sseu_finish(const char *name,
+	      unsigned int flags,
+	      struct intel_context *ce,
+	      struct drm_i915_gem_object *obj,
+	      unsigned int expected,
+	      struct igt_spinner *spin)
+{
+	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
+	u32 rpcs = 0;
+	int ret = 0;
+
+	if (flags & TEST_RESET) {
+		ret = intel_engine_reset(ce->engine, "sseu");
+		if (ret)
+			goto out;
+	}
+
+	ret = __read_slice_count(ce, obj,
+				 flags & TEST_RESET ? NULL : spin, &rpcs);
+	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
+	if (ret)
+		goto out;
+
+	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
+	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
+
+out:
+	if (spin)
+		igt_spinner_end(spin);
+
+	if ((flags & TEST_IDLE) && ret == 0) {
+		ret = igt_flush_test(ce->engine->i915);
+		if (ret)
+			return ret;
+
+		ret = __read_slice_count(ce, obj, NULL, &rpcs);
+		ret = __check_rpcs(name, rpcs, ret, expected,
+				   "Context", " after idle!");
+	}
+
+	return ret;
+}
+
+static int
+__sseu_test(const char *name,
+	    unsigned int flags,
+	    struct intel_context *ce,
+	    struct drm_i915_gem_object *obj,
+	    struct intel_sseu sseu)
+{
+	struct igt_spinner *spin = NULL;
+	int ret;
+
+	intel_engine_pm_get(ce->engine);
+
+	ret = __sseu_prepare(name, flags, ce, &spin);
+	if (ret)
+		goto out_pm;
+
+	ret = intel_context_reconfigure_sseu(ce, sseu);
+	if (ret)
+		goto out_spin;
+
+	ret = __sseu_finish(name, flags, ce, obj,
+			    hweight32(sseu.slice_mask), spin);
+
+out_spin:
+	if (spin) {
+		igt_spinner_end(spin);
+		igt_spinner_fini(spin);
+		kfree(spin);
+	}
+out_pm:
+	intel_engine_pm_put(ce->engine);
+	return ret;
+}
+
+static int
+__igt_ctx_sseu(struct drm_i915_private *i915,
+	       const char *name,
+	       unsigned int flags)
+{
+	struct drm_i915_gem_object *obj;
+	int inst = 0;
+	int ret = 0;
+
+	if (GRAPHICS_VER(i915) < 9)
+		return 0;
+
+	if (flags & TEST_RESET)
+		igt_global_reset_lock(to_gt(i915));
+
+	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		ret = PTR_ERR(obj);
+		goto out_unlock;
+	}
+
+	do {
+		struct intel_engine_cs *engine;
+		struct intel_context *ce;
+		struct intel_sseu pg_sseu;
+
+		engine = intel_engine_lookup_user(i915,
+						  I915_ENGINE_CLASS_RENDER,
+						  inst++);
+		if (!engine)
+			break;
+
+		if (hweight32(engine->sseu.slice_mask) < 2)
+			continue;
+
+		if (!engine->gt->info.sseu.has_slice_pg)
+			continue;
+
+		/*
+		 * Gen11 VME friendly power-gated configuration with
+		 * half enabled sub-slices.
+		 */
+		pg_sseu = engine->sseu;
+		pg_sseu.slice_mask = 1;
+		pg_sseu.subslice_mask =
+			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
+
+		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
+			engine->name, name, flags,
+			hweight32(engine->sseu.slice_mask),
+			hweight32(pg_sseu.slice_mask));
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			ret = PTR_ERR(ce);
+			goto out_put;
+		}
+
+		ret = intel_context_pin(ce);
+		if (ret)
+			goto out_ce;
+
+		/* First set the default mask. */
+		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
+		if (ret)
+			goto out_unpin;
+
+		/* Then set a power-gated configuration. */
+		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
+		if (ret)
+			goto out_unpin;
+
+		/* Back to defaults. */
+		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
+		if (ret)
+			goto out_unpin;
+
+		/* One last power-gated configuration for the road. */
+		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
+		if (ret)
+			goto out_unpin;
+
+out_unpin:
+		intel_context_unpin(ce);
+out_ce:
+		intel_context_put(ce);
+	} while (!ret);
+
+	if (igt_flush_test(i915))
+		ret = -EIO;
+
+out_put:
+	i915_gem_object_put(obj);
+
+out_unlock:
+	if (flags & TEST_RESET)
+		igt_global_reset_unlock(to_gt(i915));
+
+	if (ret)
+		pr_err("%s: Failed with %d!\n", name, ret);
+
+	return ret;
+}
+
+static int igt_ctx_sseu(void *arg)
+{
+	struct {
+		const char *name;
+		unsigned int flags;
+	} *phase, phases[] = {
+		{ .name = "basic", .flags = 0 },
+		{ .name = "idle", .flags = TEST_IDLE },
+		{ .name = "busy", .flags = TEST_BUSY },
+		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
+		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
+		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
+	};
+	unsigned int i;
+	int ret = 0;
+
+	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
+	     i++, phase++)
+		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
+
+	return ret;
+}
+
+static int igt_ctx_readonly(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	unsigned long idx, ndwords, dw, num_engines;
+	struct drm_i915_gem_object *obj = NULL;
+	struct i915_request *tq[5] = {};
+	struct i915_gem_engines_iter it;
+	struct i915_address_space *vm;
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	struct igt_live_test t;
+	I915_RND_STATE(prng);
+	IGT_TIMEOUT(end_time);
+	LIST_HEAD(objects);
+	struct file *file;
+	int err = -ENODEV;
+
+	/*
+	 * Create a few read-only objects (with the occasional writable object)
+	 * and try to write into these object checking that the GPU discards
+	 * any write to a read-only object.
+	 */
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	err = igt_live_test_begin(&t, i915, __func__, "");
+	if (err)
+		goto out_file;
+
+	ctx = live_context(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_file;
+	}
+
+	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
+	if (!vm || !vm->has_read_only) {
+		err = 0;
+		goto out_file;
+	}
+
+	num_engines = 0;
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
+		if (intel_engine_can_store_dword(ce->engine))
+			num_engines++;
+	i915_gem_context_unlock_engines(ctx);
+
+	ndwords = 0;
+	dw = 0;
+	while (!time_after(jiffies, end_time)) {
+		for_each_gem_engine(ce,
+				    i915_gem_context_lock_engines(ctx), it) {
+			if (!intel_engine_can_store_dword(ce->engine))
+				continue;
+
+			if (!obj) {
+				obj = create_test_object(ce->vm, file, &objects);
+				if (IS_ERR(obj)) {
+					err = PTR_ERR(obj);
+					i915_gem_context_unlock_engines(ctx);
+					goto out_file;
+				}
+
+				if (prandom_u32_state(&prng) & 1)
+					i915_gem_object_set_readonly(obj);
+			}
+
+			err = gpu_fill(ce, obj, dw);
+			if (err) {
+				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
+				       ndwords, dw, max_dwords(obj),
+				       ce->engine->name,
+				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
+				       err);
+				i915_gem_context_unlock_engines(ctx);
+				goto out_file;
+			}
+
+			err = throttle(ce, tq, ARRAY_SIZE(tq));
+			if (err) {
+				i915_gem_context_unlock_engines(ctx);
+				goto out_file;
+			}
+
+			if (++dw == max_dwords(obj)) {
+				obj = NULL;
+				dw = 0;
+			}
+			ndwords++;
+		}
+		i915_gem_context_unlock_engines(ctx);
+	}
+	pr_info("Submitted %lu dwords (across %lu engines)\n",
+		ndwords, num_engines);
+
+	dw = 0;
+	idx = 0;
+	list_for_each_entry(obj, &objects, st_link) {
+		unsigned int rem =
+			min_t(unsigned int, ndwords - dw, max_dwords(obj));
+		unsigned int num_writes;
+
+		num_writes = rem;
+		if (i915_gem_object_is_readonly(obj))
+			num_writes = 0;
+
+		err = cpu_check(obj, idx++, num_writes);
+		if (err)
+			break;
+
+		dw += rem;
+	}
+
+out_file:
+	throttle_release(tq, ARRAY_SIZE(tq));
+	if (igt_live_test_end(&t))
+		err = -EIO;
+
+	fput(file);
+	return err;
+}
+
+static int check_scratch(struct i915_address_space *vm, u64 offset)
+{
+	struct drm_mm_node *node;
+
+	mutex_lock(&vm->mutex);
+	node = __drm_mm_interval_first(&vm->mm,
+				       offset, offset + sizeof(u32) - 1);
+	mutex_unlock(&vm->mutex);
+	if (!node || node->start > offset)
+		return 0;
+
+	GEM_BUG_ON(offset >= node->start + node->size);
+
+	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
+	       upper_32_bits(offset), lower_32_bits(offset));
+	return -EINVAL;
+}
+
+static int write_to_scratch(struct i915_gem_context *ctx,
+			    struct intel_engine_cs *engine,
+			    struct drm_i915_gem_object *obj,
+			    u64 offset, u32 value)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct i915_address_space *vm;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	u32 *cmd;
+	int err;
+
+	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
+
+	err = check_scratch(ctx->vm, offset);
+	if (err)
+		return err;
+
+	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+	if (IS_ERR(cmd))
+		return PTR_ERR(cmd);
+
+	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
+	if (GRAPHICS_VER(i915) >= 8) {
+		*cmd++ = lower_32_bits(offset);
+		*cmd++ = upper_32_bits(offset);
+	} else {
+		*cmd++ = 0;
+		*cmd++ = offset;
+	}
+	*cmd++ = value;
+	*cmd = MI_BATCH_BUFFER_END;
+	__i915_gem_object_flush_map(obj, 0, 64);
+	i915_gem_object_unpin_map(obj);
+
+	intel_gt_chipset_flush(engine->gt);
+
+	vm = i915_gem_context_get_eb_vm(ctx);
+	vma = i915_vma_instance(obj, vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_vm;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
+	if (err)
+		goto out_vm;
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	err = igt_vma_move_to_active_unlocked(vma, rq, 0);
+	if (err)
+		goto skip_request;
+
+	if (rq->engine->emit_init_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto skip_request;
+	}
+
+	err = engine->emit_bb_start(rq, i915_vma_offset(vma),
+				    i915_vma_size(vma), 0);
+	if (err)
+		goto skip_request;
+
+	i915_vma_unpin(vma);
+
+	i915_request_add(rq);
+
+	goto out_vm;
+skip_request:
+	i915_request_set_error_once(rq, err);
+	i915_request_add(rq);
+err_unpin:
+	i915_vma_unpin(vma);
+out_vm:
+	i915_vm_put(vm);
+
+	if (!err)
+		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+
+	return err;
+}
+
+static int read_from_scratch(struct i915_gem_context *ctx,
+			     struct intel_engine_cs *engine,
+			     struct drm_i915_gem_object *obj,
+			     u64 offset, u32 *value)
+{
+	struct drm_i915_private *i915 = ctx->i915;
+	struct i915_address_space *vm;
+	const u32 result = 0x100;
+	struct i915_request *rq;
+	struct i915_vma *vma;
+	unsigned int flags;
+	u32 *cmd;
+	int err;
+
+	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
+
+	err = check_scratch(ctx->vm, offset);
+	if (err)
+		return err;
+
+	if (GRAPHICS_VER(i915) >= 8) {
+		const u32 GPR0 = engine->mmio_base + 0x600;
+
+		vm = i915_gem_context_get_eb_vm(ctx);
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_vm;
+		}
+
+		err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
+		if (err)
+			goto out_vm;
+
+		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+		if (IS_ERR(cmd)) {
+			err = PTR_ERR(cmd);
+			goto err_unpin;
+		}
+
+		memset(cmd, POISON_INUSE, PAGE_SIZE);
+		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
+		*cmd++ = GPR0;
+		*cmd++ = lower_32_bits(offset);
+		*cmd++ = upper_32_bits(offset);
+		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
+		*cmd++ = GPR0;
+		*cmd++ = result;
+		*cmd++ = 0;
+		*cmd = MI_BATCH_BUFFER_END;
+
+		i915_gem_object_flush_map(obj);
+		i915_gem_object_unpin_map(obj);
+
+		flags = 0;
+	} else {
+		const u32 reg = engine->mmio_base + 0x420;
+
+		/* hsw: register access even to 3DPRIM! is protected */
+		vm = i915_vm_get(&engine->gt->ggtt->vm);
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_vm;
+		}
+
+		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
+		if (err)
+			goto out_vm;
+
+		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+		if (IS_ERR(cmd)) {
+			err = PTR_ERR(cmd);
+			goto err_unpin;
+		}
+
+		memset(cmd, POISON_INUSE, PAGE_SIZE);
+		*cmd++ = MI_LOAD_REGISTER_MEM;
+		*cmd++ = reg;
+		*cmd++ = offset;
+		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
+		*cmd++ = reg;
+		*cmd++ = i915_vma_offset(vma) + result;
+		*cmd = MI_BATCH_BUFFER_END;
+
+		i915_gem_object_flush_map(obj);
+		i915_gem_object_unpin_map(obj);
+
+		flags = I915_DISPATCH_SECURE;
+	}
+
+	intel_gt_chipset_flush(engine->gt);
+
+	rq = igt_request_alloc(ctx, engine);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_unpin;
+	}
+
+	err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
+
+	if (rq->engine->emit_init_breadcrumb) {
+		err = rq->engine->emit_init_breadcrumb(rq);
+		if (err)
+			goto skip_request;
+	}
+
+	err = engine->emit_bb_start(rq, i915_vma_offset(vma),
+				    i915_vma_size(vma), flags);
+	if (err)
+		goto skip_request;
+
+	i915_vma_unpin(vma);
+
+	i915_request_add(rq);
+
+	i915_gem_object_lock(obj, NULL);
+	err = i915_gem_object_set_to_cpu_domain(obj, false);
+	i915_gem_object_unlock(obj);
+	if (err)
+		goto out_vm;
+
+	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto out_vm;
+	}
+
+	*value = cmd[result / sizeof(*cmd)];
+	i915_gem_object_unpin_map(obj);
+
+	goto out_vm;
+skip_request:
+	i915_request_set_error_once(rq, err);
+	i915_request_add(rq);
+err_unpin:
+	i915_vma_unpin(vma);
+out_vm:
+	i915_vm_put(vm);
+
+	if (!err)
+		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
+
+	return err;
+}
+
+static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
+{
+	struct i915_address_space *vm;
+	u32 *vaddr;
+	int err = 0;
+
+	vm = ctx->vm;
+	if (!vm)
+		return -ENODEV;
+
+	if (!vm->scratch[0]) {
+		pr_err("No scratch page!\n");
+		return -EINVAL;
+	}
+
+	vaddr = __px_vaddr(vm->scratch[0]);
+
+	memcpy(out, vaddr, sizeof(*out));
+	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
+		pr_err("Inconsistent initial state of scratch page!\n");
+		err = -EINVAL;
+	}
+
+	return err;
+}
+
+static int igt_vm_isolation(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct i915_gem_context *ctx_a, *ctx_b;
+	struct drm_i915_gem_object *obj_a, *obj_b;
+	unsigned long num_engines, count;
+	struct intel_engine_cs *engine;
+	struct igt_live_test t;
+	I915_RND_STATE(prng);
+	struct file *file;
+	u64 vm_total;
+	u32 expected;
+	int err;
+
+	if (GRAPHICS_VER(i915) < 7)
+		return 0;
+
+	/*
+	 * The simple goal here is that a write into one context is not
+	 * observed in a second (separate page tables and scratch).
+	 */
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	err = igt_live_test_begin(&t, i915, __func__, "");
+	if (err)
+		goto out_file;
+
+	ctx_a = live_context(i915, file);
+	if (IS_ERR(ctx_a)) {
+		err = PTR_ERR(ctx_a);
+		goto out_file;
+	}
+
+	ctx_b = live_context(i915, file);
+	if (IS_ERR(ctx_b)) {
+		err = PTR_ERR(ctx_b);
+		goto out_file;
+	}
+
+	/* We can only test vm isolation, if the vm are distinct */
+	if (ctx_a->vm == ctx_b->vm)
+		goto out_file;
+
+	/* Read the initial state of the scratch page */
+	err = check_scratch_page(ctx_a, &expected);
+	if (err)
+		goto out_file;
+
+	err = check_scratch_page(ctx_b, &expected);
+	if (err)
+		goto out_file;
+
+	vm_total = ctx_a->vm->total;
+	GEM_BUG_ON(ctx_b->vm->total != vm_total);
+
+	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj_a)) {
+		err = PTR_ERR(obj_a);
+		goto out_file;
+	}
+
+	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj_b)) {
+		err = PTR_ERR(obj_b);
+		goto put_a;
+	}
+
+	count = 0;
+	num_engines = 0;
+	for_each_uabi_engine(engine, i915) {
+		IGT_TIMEOUT(end_time);
+		unsigned long this = 0;
+
+		if (!intel_engine_can_store_dword(engine))
+			continue;
+
+		/* Not all engines have their own GPR! */
+		if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
+			continue;
+
+		while (!__igt_timeout(end_time, NULL)) {
+			u32 value = 0xc5c5c5c5;
+			u64 offset;
+
+			/* Leave enough space at offset 0 for the batch */
+			offset = igt_random_offset(&prng,
+						   I915_GTT_PAGE_SIZE, vm_total,
+						   sizeof(u32), alignof_dword);
+
+			err = write_to_scratch(ctx_a, engine, obj_a,
+					       offset, 0xdeadbeef);
+			if (err == 0)
+				err = read_from_scratch(ctx_b, engine, obj_b,
+							offset, &value);
+			if (err)
+				goto put_b;
+
+			if (value != expected) {
+				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
+				       engine->name, value,
+				       upper_32_bits(offset),
+				       lower_32_bits(offset),
+				       this);
+				err = -EINVAL;
+				goto put_b;
+			}
+
+			this++;
+		}
+		count += this;
+		num_engines++;
+	}
+	pr_info("Checked %lu scratch offsets across %lu engines\n",
+		count, num_engines);
+
+put_b:
+	i915_gem_object_put(obj_b);
+put_a:
+	i915_gem_object_put(obj_a);
+out_file:
+	if (igt_live_test_end(&t))
+		err = -EIO;
+	fput(file);
+	return err;
+}
+
+int i915_gem_context_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(live_nop_switch),
+		SUBTEST(live_parallel_switch),
+		SUBTEST(igt_ctx_exec),
+		SUBTEST(igt_ctx_readonly),
+		SUBTEST(igt_ctx_sseu),
+		SUBTEST(igt_shared_ctx_exec),
+		SUBTEST(igt_vm_isolation),
+	};
+
+	if (intel_gt_is_wedged(to_gt(i915)))
+		return 0;
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
new file mode 100644
index 0000000000..e57f939007
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c
@@ -0,0 +1,557 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "i915_selftest.h"
+#include "gem/i915_gem_context.h"
+
+#include "mock_context.h"
+#include "mock_dmabuf.h"
+#include "igt_gem_utils.h"
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gem_device.h"
+
+static int igt_dmabuf_export(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
+	i915_gem_object_put(obj);
+	if (IS_ERR(dmabuf)) {
+		pr_err("i915_gem_prime_export failed with err=%d\n",
+		       (int)PTR_ERR(dmabuf));
+		return PTR_ERR(dmabuf);
+	}
+
+	dma_buf_put(dmabuf);
+	return 0;
+}
+
+static int igt_dmabuf_import_self(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj, *import_obj;
+	struct drm_gem_object *import;
+	struct dma_buf *dmabuf;
+	int err;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
+	if (IS_ERR(dmabuf)) {
+		pr_err("i915_gem_prime_export failed with err=%d\n",
+		       (int)PTR_ERR(dmabuf));
+		err = PTR_ERR(dmabuf);
+		goto out;
+	}
+
+	import = i915_gem_prime_import(&i915->drm, dmabuf);
+	if (IS_ERR(import)) {
+		pr_err("i915_gem_prime_import failed with err=%d\n",
+		       (int)PTR_ERR(import));
+		err = PTR_ERR(import);
+		goto out_dmabuf;
+	}
+	import_obj = to_intel_bo(import);
+
+	if (import != &obj->base) {
+		pr_err("i915_gem_prime_import created a new object!\n");
+		err = -EINVAL;
+		goto out_import;
+	}
+
+	i915_gem_object_lock(import_obj, NULL);
+	err = __i915_gem_object_get_pages(import_obj);
+	i915_gem_object_unlock(import_obj);
+	if (err) {
+		pr_err("Same object dma-buf get_pages failed!\n");
+		goto out_import;
+	}
+
+	err = 0;
+out_import:
+	i915_gem_object_put(import_obj);
+out_dmabuf:
+	dma_buf_put(dmabuf);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int igt_dmabuf_import_same_driver_lmem(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *lmem = i915->mm.regions[INTEL_REGION_LMEM_0];
+	struct drm_i915_gem_object *obj;
+	struct drm_gem_object *import;
+	struct dma_buf *dmabuf;
+	int err;
+
+	if (!lmem)
+		return 0;
+
+	force_different_devices = true;
+
+	obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &lmem, 1);
+	if (IS_ERR(obj)) {
+		pr_err("__i915_gem_object_create_user failed with err=%ld\n",
+		       PTR_ERR(obj));
+		err = PTR_ERR(obj);
+		goto out_ret;
+	}
+
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
+	if (IS_ERR(dmabuf)) {
+		pr_err("i915_gem_prime_export failed with err=%ld\n",
+		       PTR_ERR(dmabuf));
+		err = PTR_ERR(dmabuf);
+		goto out;
+	}
+
+	/*
+	 * We expect an import of an LMEM-only object to fail with
+	 * -EOPNOTSUPP because it can't be migrated to SMEM.
+	 */
+	import = i915_gem_prime_import(&i915->drm, dmabuf);
+	if (!IS_ERR(import)) {
+		drm_gem_object_put(import);
+		pr_err("i915_gem_prime_import succeeded when it shouldn't have\n");
+		err = -EINVAL;
+	} else if (PTR_ERR(import) != -EOPNOTSUPP) {
+		pr_err("i915_gem_prime_import failed with the wrong err=%ld\n",
+		       PTR_ERR(import));
+		err = PTR_ERR(import);
+	} else {
+		err = 0;
+	}
+
+	dma_buf_put(dmabuf);
+out:
+	i915_gem_object_put(obj);
+out_ret:
+	force_different_devices = false;
+	return err;
+}
+
+static int verify_access(struct drm_i915_private *i915,
+			 struct drm_i915_gem_object *native_obj,
+			 struct drm_i915_gem_object *import_obj)
+{
+	struct i915_gem_engines_iter it;
+	struct i915_gem_context *ctx;
+	struct intel_context *ce;
+	struct i915_vma *vma;
+	struct file *file;
+	u32 *vaddr;
+	int err = 0, i;
+
+	file = mock_file(i915);
+	if (IS_ERR(file))
+		return PTR_ERR(file);
+
+	ctx = live_context(i915, file);
+	if (IS_ERR(ctx)) {
+		err = PTR_ERR(ctx);
+		goto out_file;
+	}
+
+	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
+		if (intel_engine_can_store_dword(ce->engine))
+			break;
+	}
+	i915_gem_context_unlock_engines(ctx);
+	if (!ce)
+		goto out_file;
+
+	vma = i915_vma_instance(import_obj, ce->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto out_file;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto out_file;
+
+	err = igt_gpu_fill_dw(ce, vma, 0,
+			      vma->size >> PAGE_SHIFT, 0xdeadbeaf);
+	i915_vma_unpin(vma);
+	if (err)
+		goto out_file;
+
+	err = i915_gem_object_wait(import_obj, 0, MAX_SCHEDULE_TIMEOUT);
+	if (err)
+		goto out_file;
+
+	vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		err = PTR_ERR(vaddr);
+		goto out_file;
+	}
+
+	for (i = 0; i < native_obj->base.size / sizeof(u32); i += PAGE_SIZE / sizeof(u32)) {
+		if (vaddr[i] != 0xdeadbeaf) {
+			pr_err("Data mismatch [%d]=%u\n", i, vaddr[i]);
+			err = -EINVAL;
+			goto out_file;
+		}
+	}
+
+out_file:
+	fput(file);
+	return err;
+}
+
+static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915,
+					 struct intel_memory_region **regions,
+					 unsigned int num_regions)
+{
+	struct drm_i915_gem_object *obj, *import_obj;
+	struct drm_gem_object *import;
+	struct dma_buf *dmabuf;
+	struct dma_buf_attachment *import_attach;
+	struct sg_table *st;
+	long timeout;
+	int err;
+
+	force_different_devices = true;
+
+	obj = __i915_gem_object_create_user(i915, SZ_8M,
+					    regions, num_regions);
+	if (IS_ERR(obj)) {
+		pr_err("__i915_gem_object_create_user failed with err=%ld\n",
+		       PTR_ERR(obj));
+		err = PTR_ERR(obj);
+		goto out_ret;
+	}
+
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
+	if (IS_ERR(dmabuf)) {
+		pr_err("i915_gem_prime_export failed with err=%ld\n",
+		       PTR_ERR(dmabuf));
+		err = PTR_ERR(dmabuf);
+		goto out;
+	}
+
+	import = i915_gem_prime_import(&i915->drm, dmabuf);
+	if (IS_ERR(import)) {
+		pr_err("i915_gem_prime_import failed with err=%ld\n",
+		       PTR_ERR(import));
+		err = PTR_ERR(import);
+		goto out_dmabuf;
+	}
+	import_obj = to_intel_bo(import);
+
+	if (import == &obj->base) {
+		pr_err("i915_gem_prime_import reused gem object!\n");
+		err = -EINVAL;
+		goto out_import;
+	}
+
+	i915_gem_object_lock(import_obj, NULL);
+	err = __i915_gem_object_get_pages(import_obj);
+	if (err) {
+		pr_err("Different objects dma-buf get_pages failed!\n");
+		i915_gem_object_unlock(import_obj);
+		goto out_import;
+	}
+
+	/*
+	 * If the exported object is not in system memory, something
+	 * weird is going on. TODO: When p2p is supported, this is no
+	 * longer considered weird.
+	 */
+	if (obj->mm.region != i915->mm.regions[INTEL_REGION_SMEM]) {
+		pr_err("Exported dma-buf is not in system memory\n");
+		err = -EINVAL;
+	}
+
+	i915_gem_object_unlock(import_obj);
+
+	err = verify_access(i915, obj, import_obj);
+	if (err)
+		goto out_import;
+
+	/* Now try a fake an importer */
+	import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev);
+	if (IS_ERR(import_attach)) {
+		err = PTR_ERR(import_attach);
+		goto out_import;
+	}
+
+	st = dma_buf_map_attachment_unlocked(import_attach, DMA_BIDIRECTIONAL);
+	if (IS_ERR(st)) {
+		err = PTR_ERR(st);
+		goto out_detach;
+	}
+
+	timeout = dma_resv_wait_timeout(dmabuf->resv, DMA_RESV_USAGE_WRITE,
+					true, 5 * HZ);
+	if (!timeout) {
+		pr_err("dmabuf wait for exclusive fence timed out.\n");
+		timeout = -ETIME;
+	}
+	err = timeout > 0 ? 0 : timeout;
+	dma_buf_unmap_attachment_unlocked(import_attach, st, DMA_BIDIRECTIONAL);
+out_detach:
+	dma_buf_detach(dmabuf, import_attach);
+out_import:
+	i915_gem_object_put(import_obj);
+out_dmabuf:
+	dma_buf_put(dmabuf);
+out:
+	i915_gem_object_put(obj);
+out_ret:
+	force_different_devices = false;
+	return err;
+}
+
+static int igt_dmabuf_import_same_driver_smem(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *smem = i915->mm.regions[INTEL_REGION_SMEM];
+
+	return igt_dmabuf_import_same_driver(i915, &smem, 1);
+}
+
+static int igt_dmabuf_import_same_driver_lmem_smem(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *regions[2];
+
+	if (!i915->mm.regions[INTEL_REGION_LMEM_0])
+		return 0;
+
+	regions[0] = i915->mm.regions[INTEL_REGION_LMEM_0];
+	regions[1] = i915->mm.regions[INTEL_REGION_SMEM];
+	return igt_dmabuf_import_same_driver(i915, regions, 2);
+}
+
+static int igt_dmabuf_import(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	void *obj_map, *dma_map;
+	struct iosys_map map;
+	u32 pattern[] = { 0, 0xaa, 0xcc, 0x55, 0xff };
+	int err, i;
+
+	dmabuf = mock_dmabuf(1);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf));
+	if (IS_ERR(obj)) {
+		pr_err("i915_gem_prime_import failed with err=%d\n",
+		       (int)PTR_ERR(obj));
+		err = PTR_ERR(obj);
+		goto out_dmabuf;
+	}
+
+	if (obj->base.dev != &i915->drm) {
+		pr_err("i915_gem_prime_import created a non-i915 object!\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	if (obj->base.size != PAGE_SIZE) {
+		pr_err("i915_gem_prime_import is wrong size found %lld, expected %ld\n",
+		       (long long)obj->base.size, PAGE_SIZE);
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	err = dma_buf_vmap_unlocked(dmabuf, &map);
+	dma_map = err ? NULL : map.vaddr;
+	if (!dma_map) {
+		pr_err("dma_buf_vmap failed\n");
+		err = -ENOMEM;
+		goto out_obj;
+	}
+
+	if (0) { /* Can not yet map dmabuf */
+		obj_map = i915_gem_object_pin_map(obj, I915_MAP_WB);
+		if (IS_ERR(obj_map)) {
+			err = PTR_ERR(obj_map);
+			pr_err("i915_gem_object_pin_map failed with err=%d\n", err);
+			goto out_dma_map;
+		}
+
+		for (i = 0; i < ARRAY_SIZE(pattern); i++) {
+			memset(dma_map, pattern[i], PAGE_SIZE);
+			if (memchr_inv(obj_map, pattern[i], PAGE_SIZE)) {
+				err = -EINVAL;
+				pr_err("imported vmap not all set to %x!\n", pattern[i]);
+				i915_gem_object_unpin_map(obj);
+				goto out_dma_map;
+			}
+		}
+
+		for (i = 0; i < ARRAY_SIZE(pattern); i++) {
+			memset(obj_map, pattern[i], PAGE_SIZE);
+			if (memchr_inv(dma_map, pattern[i], PAGE_SIZE)) {
+				err = -EINVAL;
+				pr_err("exported vmap not all set to %x!\n", pattern[i]);
+				i915_gem_object_unpin_map(obj);
+				goto out_dma_map;
+			}
+		}
+
+		i915_gem_object_unpin_map(obj);
+	}
+
+	err = 0;
+out_dma_map:
+	dma_buf_vunmap_unlocked(dmabuf, &map);
+out_obj:
+	i915_gem_object_put(obj);
+out_dmabuf:
+	dma_buf_put(dmabuf);
+	return err;
+}
+
+static int igt_dmabuf_import_ownership(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	struct iosys_map map;
+	void *ptr;
+	int err;
+
+	dmabuf = mock_dmabuf(1);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	err = dma_buf_vmap_unlocked(dmabuf, &map);
+	ptr = err ? NULL : map.vaddr;
+	if (!ptr) {
+		pr_err("dma_buf_vmap failed\n");
+		err = -ENOMEM;
+		goto err_dmabuf;
+	}
+
+	memset(ptr, 0xc5, PAGE_SIZE);
+	dma_buf_vunmap_unlocked(dmabuf, &map);
+
+	obj = to_intel_bo(i915_gem_prime_import(&i915->drm, dmabuf));
+	if (IS_ERR(obj)) {
+		pr_err("i915_gem_prime_import failed with err=%d\n",
+		       (int)PTR_ERR(obj));
+		err = PTR_ERR(obj);
+		goto err_dmabuf;
+	}
+
+	dma_buf_put(dmabuf);
+
+	err = i915_gem_object_pin_pages_unlocked(obj);
+	if (err) {
+		pr_err("i915_gem_object_pin_pages failed with err=%d\n", err);
+		goto out_obj;
+	}
+
+	err = 0;
+	i915_gem_object_unpin_pages(obj);
+out_obj:
+	i915_gem_object_put(obj);
+	return err;
+
+err_dmabuf:
+	dma_buf_put(dmabuf);
+	return err;
+}
+
+static int igt_dmabuf_export_vmap(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	struct iosys_map map;
+	void *ptr;
+	int err;
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	dmabuf = i915_gem_prime_export(&obj->base, 0);
+	if (IS_ERR(dmabuf)) {
+		pr_err("i915_gem_prime_export failed with err=%d\n",
+		       (int)PTR_ERR(dmabuf));
+		err = PTR_ERR(dmabuf);
+		goto err_obj;
+	}
+	i915_gem_object_put(obj);
+
+	err = dma_buf_vmap_unlocked(dmabuf, &map);
+	ptr = err ? NULL : map.vaddr;
+	if (!ptr) {
+		pr_err("dma_buf_vmap failed\n");
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (memchr_inv(ptr, 0, dmabuf->size)) {
+		pr_err("Exported object not initialiased to zero!\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	memset(ptr, 0xc5, dmabuf->size);
+
+	err = 0;
+	dma_buf_vunmap_unlocked(dmabuf, &map);
+out:
+	dma_buf_put(dmabuf);
+	return err;
+
+err_obj:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+int i915_gem_dmabuf_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_dmabuf_export),
+		SUBTEST(igt_dmabuf_import_self),
+		SUBTEST(igt_dmabuf_import),
+		SUBTEST(igt_dmabuf_import_ownership),
+		SUBTEST(igt_dmabuf_export_vmap),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	mock_destroy_device(i915);
+	return err;
+}
+
+int i915_gem_dmabuf_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_dmabuf_export),
+		SUBTEST(igt_dmabuf_import_same_driver_lmem),
+		SUBTEST(igt_dmabuf_import_same_driver_smem),
+		SUBTEST(igt_dmabuf_import_same_driver_lmem_smem),
+	};
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
new file mode 100644
index 0000000000..d8f4a10d71
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_migrate.c
@@ -0,0 +1,528 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2020-2021 Intel Corporation
+ */
+
+#include "gt/intel_migrate.h"
+#include "gt/intel_gpu_commands.h"
+#include "gem/i915_gem_ttm_move.h"
+
+#include "i915_deps.h"
+
+#include "selftests/igt_reset.h"
+#include "selftests/igt_spinner.h"
+
+static int igt_fill_check_buffer(struct drm_i915_gem_object *obj,
+				 struct intel_gt *gt,
+				 bool fill)
+{
+	unsigned int i, count = obj->base.size / sizeof(u32);
+	enum i915_map_type map_type =
+		intel_gt_coherent_map_type(gt, obj, false);
+	u32 *cur;
+	int err = 0;
+
+	assert_object_held(obj);
+	cur = i915_gem_object_pin_map(obj, map_type);
+	if (IS_ERR(cur))
+		return PTR_ERR(cur);
+
+	if (fill)
+		for (i = 0; i < count; ++i)
+			*cur++ = i;
+	else
+		for (i = 0; i < count; ++i)
+			if (*cur++ != i) {
+				pr_err("Object content mismatch at location %d of %d\n", i, count);
+				err = -EINVAL;
+				break;
+			}
+
+	i915_gem_object_unpin_map(obj);
+
+	return err;
+}
+
+static int igt_create_migrate(struct intel_gt *gt, enum intel_region_id src,
+			      enum intel_region_id dst)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct intel_memory_region *src_mr = i915->mm.regions[src];
+	struct intel_memory_region *dst_mr = i915->mm.regions[dst];
+	struct drm_i915_gem_object *obj;
+	struct i915_gem_ww_ctx ww;
+	int err = 0;
+
+	GEM_BUG_ON(!src_mr);
+	GEM_BUG_ON(!dst_mr);
+
+	/* Switch object backing-store on create */
+	obj = i915_gem_object_create_region(src_mr, dst_mr->min_page_size, 0, 0);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	for_i915_gem_ww(&ww, err, true) {
+		err = i915_gem_object_lock(obj, &ww);
+		if (err)
+			continue;
+
+		err = igt_fill_check_buffer(obj, gt, true);
+		if (err)
+			continue;
+
+		err = i915_gem_object_migrate(obj, &ww, dst);
+		if (err)
+			continue;
+
+		err = i915_gem_object_pin_pages(obj);
+		if (err)
+			continue;
+
+		if (i915_gem_object_can_migrate(obj, src))
+			err = -EINVAL;
+
+		i915_gem_object_unpin_pages(obj);
+		err = i915_gem_object_wait_migration(obj, true);
+		if (err)
+			continue;
+
+		err = igt_fill_check_buffer(obj, gt, false);
+	}
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+static int igt_smem_create_migrate(void *arg)
+{
+	return igt_create_migrate(arg, INTEL_REGION_LMEM_0, INTEL_REGION_SMEM);
+}
+
+static int igt_lmem_create_migrate(void *arg)
+{
+	return igt_create_migrate(arg, INTEL_REGION_SMEM, INTEL_REGION_LMEM_0);
+}
+
+static int igt_same_create_migrate(void *arg)
+{
+	return igt_create_migrate(arg, INTEL_REGION_LMEM_0, INTEL_REGION_LMEM_0);
+}
+
+static int lmem_pages_migrate_one(struct i915_gem_ww_ctx *ww,
+				  struct drm_i915_gem_object *obj,
+				  struct i915_vma *vma,
+				  bool silent_migrate)
+{
+	int err;
+
+	err = i915_gem_object_lock(obj, ww);
+	if (err)
+		return err;
+
+	if (vma) {
+		err = i915_vma_pin_ww(vma, ww, obj->base.size, 0,
+				      0UL | PIN_OFFSET_FIXED |
+				      PIN_USER);
+		if (err) {
+			if (err != -EINTR && err != ERESTARTSYS &&
+			    err != -EDEADLK)
+				pr_err("Failed to pin vma.\n");
+			return err;
+		}
+
+		i915_vma_unpin(vma);
+	}
+
+	/*
+	 * Migration will implicitly unbind (asynchronously) any bound
+	 * vmas.
+	 */
+	if (i915_gem_object_is_lmem(obj)) {
+		err = i915_gem_object_migrate(obj, ww, INTEL_REGION_SMEM);
+		if (err) {
+			if (!silent_migrate)
+				pr_err("Object failed migration to smem\n");
+			if (err)
+				return err;
+		}
+
+		if (i915_gem_object_is_lmem(obj)) {
+			pr_err("object still backed by lmem\n");
+			err = -EINVAL;
+		}
+
+		if (!i915_gem_object_has_struct_page(obj)) {
+			pr_err("object not backed by struct page\n");
+			err = -EINVAL;
+		}
+
+	} else {
+		err = i915_gem_object_migrate(obj, ww, INTEL_REGION_LMEM_0);
+		if (err) {
+			if (!silent_migrate)
+				pr_err("Object failed migration to lmem\n");
+			if (err)
+				return err;
+		}
+
+		if (i915_gem_object_has_struct_page(obj)) {
+			pr_err("object still backed by struct page\n");
+			err = -EINVAL;
+		}
+
+		if (!i915_gem_object_is_lmem(obj)) {
+			pr_err("object not backed by lmem\n");
+			err = -EINVAL;
+		}
+	}
+
+	return err;
+}
+
+static int __igt_lmem_pages_migrate(struct intel_gt *gt,
+				    struct i915_address_space *vm,
+				    struct i915_deps *deps,
+				    struct igt_spinner *spin,
+				    struct dma_fence *spin_fence,
+				    bool borked_migrate)
+{
+	struct drm_i915_private *i915 = gt->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma = NULL;
+	struct i915_gem_ww_ctx ww;
+	struct i915_request *rq;
+	int err;
+	int i;
+
+	/* From LMEM to shmem and back again */
+
+	obj = i915_gem_object_create_lmem(i915, SZ_2M, 0);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	if (vm) {
+		vma = i915_vma_instance(obj, vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_put;
+		}
+	}
+
+	/* Initial GPU fill, sync, CPU initialization. */
+	for_i915_gem_ww(&ww, err, true) {
+		err = i915_gem_object_lock(obj, &ww);
+		if (err)
+			continue;
+
+		err = ____i915_gem_object_get_pages(obj);
+		if (err)
+			continue;
+
+		err = intel_migrate_clear(&gt->migrate, &ww, deps,
+					  obj->mm.pages->sgl, obj->pat_index,
+					  i915_gem_object_is_lmem(obj),
+					  0xdeadbeaf, &rq);
+		if (rq) {
+			err = dma_resv_reserve_fences(obj->base.resv, 1);
+			if (!err)
+				dma_resv_add_fence(obj->base.resv, &rq->fence,
+						   DMA_RESV_USAGE_KERNEL);
+			i915_request_put(rq);
+		}
+		if (err)
+			continue;
+
+		if (!vma) {
+			err = igt_fill_check_buffer(obj, gt, true);
+			if (err)
+				continue;
+		}
+	}
+	if (err)
+		goto out_put;
+
+	/*
+	 * Migrate to and from smem without explicitly syncing.
+	 * Finalize with data in smem for fast readout.
+	 */
+	for (i = 1; i <= 5; ++i) {
+		for_i915_gem_ww(&ww, err, true)
+			err = lmem_pages_migrate_one(&ww, obj, vma,
+						     borked_migrate);
+		if (err)
+			goto out_put;
+	}
+
+	err = i915_gem_object_lock_interruptible(obj, NULL);
+	if (err)
+		goto out_put;
+
+	if (spin) {
+		if (dma_fence_is_signaled(spin_fence)) {
+			pr_err("Spinner was terminated by hangcheck.\n");
+			err = -EBUSY;
+			goto out_unlock;
+		}
+		igt_spinner_end(spin);
+	}
+
+	/* Finally sync migration and check content. */
+	err = i915_gem_object_wait_migration(obj, true);
+	if (err)
+		goto out_unlock;
+
+	if (vma) {
+		err = i915_vma_wait_for_bind(vma);
+		if (err)
+			goto out_unlock;
+	} else {
+		err = igt_fill_check_buffer(obj, gt, false);
+	}
+
+out_unlock:
+	i915_gem_object_unlock(obj);
+out_put:
+	i915_gem_object_put(obj);
+
+	return err;
+}
+
+static int igt_lmem_pages_failsafe_migrate(void *arg)
+{
+	int fail_gpu, fail_alloc, ban_memcpy, ret;
+	struct intel_gt *gt = arg;
+
+	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+		for (fail_alloc = 0; fail_alloc < 2; ++fail_alloc) {
+			for (ban_memcpy = 0; ban_memcpy < 2; ++ban_memcpy) {
+				pr_info("Simulated failure modes: gpu: %d, alloc:%d, ban_memcpy: %d\n",
+					fail_gpu, fail_alloc, ban_memcpy);
+				i915_ttm_migrate_set_ban_memcpy(ban_memcpy);
+				i915_ttm_migrate_set_failure_modes(fail_gpu,
+								   fail_alloc);
+				ret = __igt_lmem_pages_migrate(gt, NULL, NULL,
+							       NULL, NULL,
+							       ban_memcpy &&
+							       fail_gpu);
+
+				if (ban_memcpy && fail_gpu) {
+					struct intel_gt *__gt;
+					unsigned int id;
+
+					if (ret != -EIO) {
+						pr_err("expected -EIO, got (%d)\n", ret);
+						ret = -EINVAL;
+					} else {
+						ret = 0;
+					}
+
+					for_each_gt(__gt, gt->i915, id) {
+						intel_wakeref_t wakeref;
+						bool wedged;
+
+						mutex_lock(&__gt->reset.mutex);
+						wedged = test_bit(I915_WEDGED, &__gt->reset.flags);
+						mutex_unlock(&__gt->reset.mutex);
+
+						if (fail_gpu && !fail_alloc) {
+							if (!wedged) {
+								pr_err("gt(%u) not wedged\n", id);
+								ret = -EINVAL;
+								continue;
+							}
+						} else if (wedged) {
+							pr_err("gt(%u) incorrectly wedged\n", id);
+							ret = -EINVAL;
+						} else {
+							continue;
+						}
+
+						wakeref = intel_runtime_pm_get(__gt->uncore->rpm);
+						igt_global_reset_lock(__gt);
+						intel_gt_reset(__gt, ALL_ENGINES, NULL);
+						igt_global_reset_unlock(__gt);
+						intel_runtime_pm_put(__gt->uncore->rpm, wakeref);
+					}
+					if (ret)
+						goto out_err;
+				}
+			}
+		}
+	}
+
+out_err:
+	i915_ttm_migrate_set_failure_modes(false, false);
+	i915_ttm_migrate_set_ban_memcpy(false);
+	return ret;
+}
+
+/*
+ * This subtest tests that unbinding at migration is indeed performed
+ * async. We launch a spinner and a number of migrations depending on
+ * that spinner to have terminated. Before each migration we bind a
+ * vma, which should then be async unbound by the migration operation.
+ * If we are able to schedule migrations without blocking while the
+ * spinner is still running, those unbinds are indeed async and non-
+ * blocking.
+ *
+ * Note that each async bind operation is awaiting the previous migration
+ * due to the moving fence resulting from the migration.
+ */
+static int igt_async_migrate(struct intel_gt *gt)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	struct i915_ppgtt *ppgtt;
+	struct igt_spinner spin;
+	int err;
+
+	ppgtt = i915_ppgtt_create(gt, 0);
+	if (IS_ERR(ppgtt))
+		return PTR_ERR(ppgtt);
+
+	if (igt_spinner_init(&spin, gt)) {
+		err = -ENOMEM;
+		goto out_spin;
+	}
+
+	for_each_engine(engine, gt, id) {
+		struct ttm_operation_ctx ctx = {
+			.interruptible = true
+		};
+		struct dma_fence *spin_fence;
+		struct intel_context *ce;
+		struct i915_request *rq;
+		struct i915_deps deps;
+
+		ce = intel_context_create(engine);
+		if (IS_ERR(ce)) {
+			err = PTR_ERR(ce);
+			goto out_ce;
+		}
+
+		/*
+		 * Use MI_NOOP, making the spinner non-preemptible. If there
+		 * is a code path where we fail async operation due to the
+		 * running spinner, we will block and fail to end the
+		 * spinner resulting in a deadlock. But with a non-
+		 * preemptible spinner, hangcheck will terminate the spinner
+		 * for us, and we will later detect that and fail the test.
+		 */
+		rq = igt_spinner_create_request(&spin, ce, MI_NOOP);
+		intel_context_put(ce);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_ce;
+		}
+
+		i915_deps_init(&deps, GFP_KERNEL);
+		err = i915_deps_add_dependency(&deps, &rq->fence, &ctx);
+		spin_fence = dma_fence_get(&rq->fence);
+		i915_request_add(rq);
+		if (err)
+			goto out_ce;
+
+		err = __igt_lmem_pages_migrate(gt, &ppgtt->vm, &deps, &spin,
+					       spin_fence, false);
+		i915_deps_fini(&deps);
+		dma_fence_put(spin_fence);
+		if (err)
+			goto out_ce;
+	}
+
+out_ce:
+	igt_spinner_fini(&spin);
+out_spin:
+	i915_vm_put(&ppgtt->vm);
+
+	return err;
+}
+
+/*
+ * Setting ASYNC_FAIL_ALLOC to 2 will simulate memory allocation failure while
+ * arming the migration error check and block async migration. This
+ * will cause us to deadlock and hangcheck will terminate the spinner
+ * causing the test to fail.
+ */
+#define ASYNC_FAIL_ALLOC 1
+static int igt_lmem_async_migrate(void *arg)
+{
+	int fail_gpu, fail_alloc, ban_memcpy, ret;
+	struct intel_gt *gt = arg;
+
+	for (fail_gpu = 0; fail_gpu < 2; ++fail_gpu) {
+		for (fail_alloc = 0; fail_alloc < ASYNC_FAIL_ALLOC; ++fail_alloc) {
+			for (ban_memcpy = 0; ban_memcpy < 2; ++ban_memcpy) {
+				pr_info("Simulated failure modes: gpu: %d, alloc: %d, ban_memcpy: %d\n",
+					fail_gpu, fail_alloc, ban_memcpy);
+				i915_ttm_migrate_set_ban_memcpy(ban_memcpy);
+				i915_ttm_migrate_set_failure_modes(fail_gpu,
+								   fail_alloc);
+				ret = igt_async_migrate(gt);
+
+				if (fail_gpu && ban_memcpy) {
+					struct intel_gt *__gt;
+					unsigned int id;
+
+					if (ret != -EIO) {
+						pr_err("expected -EIO, got (%d)\n", ret);
+						ret = -EINVAL;
+					} else {
+						ret = 0;
+					}
+
+					for_each_gt(__gt, gt->i915, id) {
+						intel_wakeref_t wakeref;
+						bool wedged;
+
+						mutex_lock(&__gt->reset.mutex);
+						wedged = test_bit(I915_WEDGED, &__gt->reset.flags);
+						mutex_unlock(&__gt->reset.mutex);
+
+						if (fail_gpu && !fail_alloc) {
+							if (!wedged) {
+								pr_err("gt(%u) not wedged\n", id);
+								ret = -EINVAL;
+								continue;
+							}
+						} else if (wedged) {
+							pr_err("gt(%u) incorrectly wedged\n", id);
+							ret = -EINVAL;
+						} else {
+							continue;
+						}
+
+						wakeref = intel_runtime_pm_get(__gt->uncore->rpm);
+						igt_global_reset_lock(__gt);
+						intel_gt_reset(__gt, ALL_ENGINES, NULL);
+						igt_global_reset_unlock(__gt);
+						intel_runtime_pm_put(__gt->uncore->rpm, wakeref);
+					}
+				}
+				if (ret)
+					goto out_err;
+			}
+		}
+	}
+
+out_err:
+	i915_ttm_migrate_set_failure_modes(false, false);
+	i915_ttm_migrate_set_ban_memcpy(false);
+	return ret;
+}
+
+int i915_gem_migrate_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_smem_create_migrate),
+		SUBTEST(igt_lmem_create_migrate),
+		SUBTEST(igt_same_create_migrate),
+		SUBTEST(igt_lmem_pages_failsafe_migrate),
+		SUBTEST(igt_lmem_async_migrate),
+	};
+
+	if (!HAS_LMEM(i915))
+		return 0;
+
+	return intel_gt_live_subtests(tests, to_gt(i915));
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
new file mode 100644
index 0000000000..72957a36a3
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -0,0 +1,1850 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include <linux/highmem.h>
+#include <linux/prime_numbers.h>
+
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_region.h"
+#include "gem/i915_gem_ttm.h"
+#include "gem/i915_gem_ttm_move.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt.h"
+#include "gt/intel_gt_pm.h"
+#include "gt/intel_migrate.h"
+#include "i915_reg.h"
+#include "i915_ttm_buddy_manager.h"
+
+#include "huge_gem_object.h"
+#include "i915_selftest.h"
+#include "selftests/i915_random.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/igt_reset.h"
+#include "selftests/igt_mmap.h"
+
+struct tile {
+	unsigned int width;
+	unsigned int height;
+	unsigned int stride;
+	unsigned int size;
+	unsigned int tiling;
+	unsigned int swizzle;
+};
+
+static u64 swizzle_bit(unsigned int bit, u64 offset)
+{
+	return (offset & BIT_ULL(bit)) >> (bit - 6);
+}
+
+static u64 tiled_offset(const struct tile *tile, u64 v)
+{
+	u64 x, y;
+
+	if (tile->tiling == I915_TILING_NONE)
+		return v;
+
+	y = div64_u64_rem(v, tile->stride, &x);
+	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
+
+	if (tile->tiling == I915_TILING_X) {
+		v += y * tile->width;
+		v += div64_u64_rem(x, tile->width, &x) << tile->size;
+		v += x;
+	} else if (tile->width == 128) {
+		const unsigned int ytile_span = 16;
+		const unsigned int ytile_height = 512;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	} else {
+		const unsigned int ytile_span = 32;
+		const unsigned int ytile_height = 256;
+
+		v += y * ytile_span;
+		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
+		v += x;
+	}
+
+	switch (tile->swizzle) {
+	case I915_BIT_6_SWIZZLE_9:
+		v ^= swizzle_bit(9, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
+		break;
+	case I915_BIT_6_SWIZZLE_9_10_11:
+		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
+		break;
+	}
+
+	return v;
+}
+
+static int check_partial_mapping(struct drm_i915_gem_object *obj,
+				 const struct tile *tile,
+				 struct rnd_state *prng)
+{
+	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_gtt_view view;
+	struct i915_vma *vma;
+	unsigned long offset;
+	unsigned long page;
+	u32 __iomem *io;
+	struct page *p;
+	unsigned int n;
+	u32 *cpu;
+	int err;
+
+	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+	if (err) {
+		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+		       tile->tiling, tile->stride, err);
+		return err;
+	}
+
+	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+	i915_gem_object_lock(obj, NULL);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
+		return err;
+	}
+
+	page = i915_prandom_u32_max_state(npages, prng);
+	view = compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+
+	vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma)) {
+		pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+		       page, (int)PTR_ERR(vma));
+		return PTR_ERR(vma);
+	}
+
+	n = page - view.partial.offset;
+	GEM_BUG_ON(n >= view.partial.size);
+
+	io = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(io)) {
+		pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+		       page, (int)PTR_ERR(io));
+		err = PTR_ERR(io);
+		goto out;
+	}
+
+	iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+	i915_vma_unpin_iomap(vma);
+
+	offset = tiled_offset(tile, page << PAGE_SHIFT);
+	if (offset >= obj->base.size)
+		goto out;
+
+	intel_gt_flush_ggtt_writes(to_gt(i915));
+
+	p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+	cpu = kmap(p) + offset_in_page(offset);
+	drm_clflush_virt_range(cpu, sizeof(*cpu));
+	if (*cpu != (u32)page) {
+		pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%lu + %u [0x%lx]) of 0x%x, found 0x%x\n",
+		       page, n,
+		       view.partial.offset,
+		       view.partial.size,
+		       vma->size >> PAGE_SHIFT,
+		       tile->tiling ? tile_row_pages(obj) : 0,
+		       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+		       offset >> PAGE_SHIFT,
+		       (unsigned int)offset_in_page(offset),
+		       offset,
+		       (u32)page, *cpu);
+		err = -EINVAL;
+	}
+	*cpu = 0;
+	drm_clflush_virt_range(cpu, sizeof(*cpu));
+	kunmap(p);
+
+out:
+	i915_gem_object_lock(obj, NULL);
+	i915_vma_destroy(vma);
+	i915_gem_object_unlock(obj);
+	return err;
+}
+
+static int check_partial_mappings(struct drm_i915_gem_object *obj,
+				  const struct tile *tile,
+				  unsigned long end_time)
+{
+	const unsigned int nreal = obj->scratch / PAGE_SIZE;
+	const unsigned long npages = obj->base.size / PAGE_SIZE;
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+	unsigned long page;
+	int err;
+
+	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
+	if (err) {
+		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
+		       tile->tiling, tile->stride, err);
+		return err;
+	}
+
+	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
+	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
+
+	i915_gem_object_lock(obj, NULL);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
+		return err;
+	}
+
+	for_each_prime_number_from(page, 1, npages) {
+		struct i915_gtt_view view =
+			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
+		unsigned long offset;
+		u32 __iomem *io;
+		struct page *p;
+		unsigned int n;
+		u32 *cpu;
+
+		GEM_BUG_ON(view.partial.size > nreal);
+		cond_resched();
+
+		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
+		if (IS_ERR(vma)) {
+			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(vma));
+			return PTR_ERR(vma);
+		}
+
+		n = page - view.partial.offset;
+		GEM_BUG_ON(n >= view.partial.size);
+
+		io = i915_vma_pin_iomap(vma);
+		i915_vma_unpin(vma);
+		if (IS_ERR(io)) {
+			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
+			       page, (int)PTR_ERR(io));
+			return PTR_ERR(io);
+		}
+
+		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
+		i915_vma_unpin_iomap(vma);
+
+		offset = tiled_offset(tile, page << PAGE_SHIFT);
+		if (offset >= obj->base.size)
+			continue;
+
+		intel_gt_flush_ggtt_writes(to_gt(i915));
+
+		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
+		cpu = kmap(p) + offset_in_page(offset);
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		if (*cpu != (u32)page) {
+			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%lu + %u [0x%lx]) of 0x%x, found 0x%x\n",
+			       page, n,
+			       view.partial.offset,
+			       view.partial.size,
+			       vma->size >> PAGE_SHIFT,
+			       tile->tiling ? tile_row_pages(obj) : 0,
+			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
+			       offset >> PAGE_SHIFT,
+			       (unsigned int)offset_in_page(offset),
+			       offset,
+			       (u32)page, *cpu);
+			err = -EINVAL;
+		}
+		*cpu = 0;
+		drm_clflush_virt_range(cpu, sizeof(*cpu));
+		kunmap(p);
+		if (err)
+			return err;
+
+		i915_gem_object_lock(obj, NULL);
+		i915_vma_destroy(vma);
+		i915_gem_object_unlock(obj);
+
+		if (igt_timeout(end_time,
+				"%s: timed out after tiling=%d stride=%d\n",
+				__func__, tile->tiling, tile->stride))
+			return -EINTR;
+	}
+
+	return 0;
+}
+
+static unsigned int
+setup_tile_size(struct tile *tile, struct drm_i915_private *i915)
+{
+	if (GRAPHICS_VER(i915) <= 2) {
+		tile->height = 16;
+		tile->width = 128;
+		tile->size = 11;
+	} else if (tile->tiling == I915_TILING_Y &&
+		   HAS_128_BYTE_Y_TILING(i915)) {
+		tile->height = 32;
+		tile->width = 128;
+		tile->size = 12;
+	} else {
+		tile->height = 8;
+		tile->width = 512;
+		tile->size = 12;
+	}
+
+	if (GRAPHICS_VER(i915) < 4)
+		return 8192 / tile->width;
+	else if (GRAPHICS_VER(i915) < 7)
+		return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width;
+	else
+		return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width;
+}
+
+static int igt_partial_tiling(void *arg)
+{
+	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	int tiling;
+	int err;
+
+	if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
+		return 0;
+
+	/* We want to check the page mapping and fencing of a large object
+	 * mmapped through the GTT. The object we create is larger than can
+	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
+	 * We then check that a write through each partial GGTT vma ends up
+	 * in the right set of pages within the object, and with the expected
+	 * tiling, which we verify by manual swizzling.
+	 */
+
+	obj = huge_gem_object(i915,
+			      nreal << PAGE_SHIFT,
+			      (1 + next_prime_number(to_gt(i915)->ggtt->vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages_unlocked(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	if (1) {
+		IGT_TIMEOUT(end);
+		struct tile tile;
+
+		tile.height = 1;
+		tile.width = 1;
+		tile.size = 0;
+		tile.stride = 0;
+		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+		tile.tiling = I915_TILING_NONE;
+
+		err = check_partial_mappings(obj, &tile, end);
+		if (err && err != -EINTR)
+			goto out_unlock;
+	}
+
+	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
+		IGT_TIMEOUT(end);
+		unsigned int max_pitch;
+		unsigned int pitch;
+		struct tile tile;
+
+		if (i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
+			/*
+			 * The swizzling pattern is actually unknown as it
+			 * varies based on physical address of each page.
+			 * See i915_gem_detect_bit_6_swizzle().
+			 */
+			break;
+
+		tile.tiling = tiling;
+		switch (tiling) {
+		case I915_TILING_X:
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_x;
+			break;
+		case I915_TILING_Y:
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_y;
+			break;
+		}
+
+		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
+		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+			continue;
+
+		max_pitch = setup_tile_size(&tile, i915);
+
+		for (pitch = max_pitch; pitch; pitch >>= 1) {
+			tile.stride = tile.width * pitch;
+			err = check_partial_mappings(obj, &tile, end);
+			if (err == -EINTR)
+				goto next_tiling;
+			if (err)
+				goto out_unlock;
+
+			if (pitch > 2 && GRAPHICS_VER(i915) >= 4) {
+				tile.stride = tile.width * (pitch - 1);
+				err = check_partial_mappings(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+
+			if (pitch < max_pitch && GRAPHICS_VER(i915) >= 4) {
+				tile.stride = tile.width * (pitch + 1);
+				err = check_partial_mappings(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+		if (GRAPHICS_VER(i915) >= 4) {
+			for_each_prime_number(pitch, max_pitch) {
+				tile.stride = tile.width * pitch;
+				err = check_partial_mappings(obj, &tile, end);
+				if (err == -EINTR)
+					goto next_tiling;
+				if (err)
+					goto out_unlock;
+			}
+		}
+
+next_tiling: ;
+	}
+
+out_unlock:
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int igt_smoke_tiling(void *arg)
+{
+	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	intel_wakeref_t wakeref;
+	I915_RND_STATE(prng);
+	unsigned long count;
+	IGT_TIMEOUT(end);
+	int err;
+
+	if (!i915_ggtt_has_aperture(to_gt(i915)->ggtt))
+		return 0;
+
+	/*
+	 * igt_partial_tiling() does an exhastive check of partial tiling
+	 * chunking, but will undoubtably run out of time. Here, we do a
+	 * randomised search and hope over many runs of 1s with different
+	 * seeds we will do a thorough check.
+	 *
+	 * Remember to look at the st_seed if we see a flip-flop in BAT!
+	 */
+
+	if (i915->gem_quirks & GEM_QUIRK_PIN_SWIZZLED_PAGES)
+		return 0;
+
+	obj = huge_gem_object(i915,
+			      nreal << PAGE_SHIFT,
+			      (1 + next_prime_number(to_gt(i915)->ggtt->vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages_unlocked(obj);
+	if (err) {
+		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
+
+	count = 0;
+	do {
+		struct tile tile;
+
+		tile.tiling =
+			i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng);
+		switch (tile.tiling) {
+		case I915_TILING_NONE:
+			tile.height = 1;
+			tile.width = 1;
+			tile.size = 0;
+			tile.stride = 0;
+			tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
+			break;
+
+		case I915_TILING_X:
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_x;
+			break;
+		case I915_TILING_Y:
+			tile.swizzle = to_gt(i915)->ggtt->bit_6_swizzle_y;
+			break;
+		}
+
+		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
+		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
+			continue;
+
+		if (tile.tiling != I915_TILING_NONE) {
+			unsigned int max_pitch = setup_tile_size(&tile, i915);
+
+			tile.stride =
+				i915_prandom_u32_max_state(max_pitch, &prng);
+			tile.stride = (1 + tile.stride) * tile.width;
+			if (GRAPHICS_VER(i915) < 4)
+				tile.stride = rounddown_pow_of_two(tile.stride);
+		}
+
+		err = check_partial_mapping(obj, &tile, &prng);
+		if (err)
+			break;
+
+		count++;
+	} while (!__igt_timeout(end, NULL));
+
+	pr_info("%s: Completed %lu trials\n", __func__, count);
+
+	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+static int make_obj_busy(struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	struct intel_engine_cs *engine;
+
+	for_each_uabi_engine(engine, i915) {
+		struct i915_request *rq;
+		struct i915_vma *vma;
+		struct i915_gem_ww_ctx ww;
+		int err;
+
+		vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL);
+		if (IS_ERR(vma))
+			return PTR_ERR(vma);
+
+		i915_gem_ww_ctx_init(&ww, false);
+retry:
+		err = i915_gem_object_lock(obj, &ww);
+		if (!err)
+			err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+		if (err)
+			goto err;
+
+		rq = intel_engine_create_kernel_request(engine);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto err_unpin;
+		}
+
+		err = i915_vma_move_to_active(vma, rq,
+					      EXEC_OBJECT_WRITE);
+
+		i915_request_add(rq);
+err_unpin:
+		i915_vma_unpin(vma);
+err:
+		if (err == -EDEADLK) {
+			err = i915_gem_ww_ctx_backoff(&ww);
+			if (!err)
+				goto retry;
+		}
+		i915_gem_ww_ctx_fini(&ww);
+		if (err)
+			return err;
+	}
+
+	i915_gem_object_put(obj); /* leave it only alive via its active ref */
+	return 0;
+}
+
+static enum i915_mmap_type default_mapping(struct drm_i915_private *i915)
+{
+	if (HAS_LMEM(i915))
+		return I915_MMAP_TYPE_FIXED;
+
+	return I915_MMAP_TYPE_GTT;
+}
+
+static struct drm_i915_gem_object *
+create_sys_or_internal(struct drm_i915_private *i915,
+		       unsigned long size)
+{
+	if (HAS_LMEM(i915)) {
+		struct intel_memory_region *sys_region =
+			i915->mm.regions[INTEL_REGION_SMEM];
+
+		return __i915_gem_object_create_user(i915, size, &sys_region, 1);
+	}
+
+	return i915_gem_object_create_internal(i915, size);
+}
+
+static bool assert_mmap_offset(struct drm_i915_private *i915,
+			       unsigned long size,
+			       int expected)
+{
+	struct drm_i915_gem_object *obj;
+	u64 offset;
+	int ret;
+
+	obj = create_sys_or_internal(i915, size);
+	if (IS_ERR(obj))
+		return expected && expected == PTR_ERR(obj);
+
+	ret = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL);
+	i915_gem_object_put(obj);
+
+	return ret == expected;
+}
+
+static void disable_retire_worker(struct drm_i915_private *i915)
+{
+	i915_gem_driver_unregister__shrinker(i915);
+	intel_gt_pm_get(to_gt(i915));
+	cancel_delayed_work_sync(&to_gt(i915)->requests.retire_work);
+}
+
+static void restore_retire_worker(struct drm_i915_private *i915)
+{
+	igt_flush_test(i915);
+	intel_gt_pm_put(to_gt(i915));
+	i915_gem_driver_register__shrinker(i915);
+}
+
+static void mmap_offset_lock(struct drm_i915_private *i915)
+	__acquires(&i915->drm.vma_offset_manager->vm_lock)
+{
+	write_lock(&i915->drm.vma_offset_manager->vm_lock);
+}
+
+static void mmap_offset_unlock(struct drm_i915_private *i915)
+	__releases(&i915->drm.vma_offset_manager->vm_lock)
+{
+	write_unlock(&i915->drm.vma_offset_manager->vm_lock);
+}
+
+static int igt_mmap_offset_exhaustion(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
+	struct drm_i915_gem_object *obj;
+	struct drm_mm_node *hole, *next;
+	int loop, err = 0;
+	u64 offset;
+	int enospc = HAS_LMEM(i915) ? -ENXIO : -ENOSPC;
+
+	/* Disable background reaper */
+	disable_retire_worker(i915);
+	GEM_BUG_ON(!to_gt(i915)->awake);
+	intel_gt_retire_requests(to_gt(i915));
+	i915_gem_drain_freed_objects(i915);
+
+	/* Trim the device mmap space to only a page */
+	mmap_offset_lock(i915);
+	loop = 1; /* PAGE_SIZE units */
+	list_for_each_entry_safe(hole, next, &mm->hole_stack, hole_stack) {
+		struct drm_mm_node *resv;
+
+		resv = kzalloc(sizeof(*resv), GFP_NOWAIT);
+		if (!resv) {
+			err = -ENOMEM;
+			goto out_park;
+		}
+
+		resv->start = drm_mm_hole_node_start(hole) + loop;
+		resv->size = hole->hole_size - loop;
+		resv->color = -1ul;
+		loop = 0;
+
+		if (!resv->size) {
+			kfree(resv);
+			continue;
+		}
+
+		pr_debug("Reserving hole [%llx + %llx]\n",
+			 resv->start, resv->size);
+
+		err = drm_mm_reserve_node(mm, resv);
+		if (err) {
+			pr_err("Failed to trim VMA manager, err=%d\n", err);
+			kfree(resv);
+			goto out_park;
+		}
+	}
+	GEM_BUG_ON(!list_is_singular(&mm->hole_stack));
+	mmap_offset_unlock(i915);
+
+	/* Just fits! */
+	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
+		pr_err("Unable to insert object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Too large */
+	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, enospc)) {
+		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Fill the hole, further allocation attempts should then fail */
+	obj = create_sys_or_internal(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("Unable to create object for reclaimed hole\n");
+		goto out;
+	}
+
+	err = __assign_mmap_offset(obj, default_mapping(i915), &offset, NULL);
+	if (err) {
+		pr_err("Unable to insert object into reclaimed hole\n");
+		goto err_obj;
+	}
+
+	if (!assert_mmap_offset(i915, PAGE_SIZE, enospc)) {
+		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
+		err = -EINVAL;
+		goto err_obj;
+	}
+
+	i915_gem_object_put(obj);
+
+	/* Now fill with busy dead objects that we expect to reap */
+	for (loop = 0; loop < 3; loop++) {
+		if (intel_gt_is_wedged(to_gt(i915)))
+			break;
+
+		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto out;
+		}
+
+		err = make_obj_busy(obj);
+		if (err) {
+			pr_err("[loop %d] Failed to busy the object\n", loop);
+			goto err_obj;
+		}
+	}
+
+out:
+	mmap_offset_lock(i915);
+out_park:
+	drm_mm_for_each_node_safe(hole, next, mm) {
+		if (hole->color != -1ul)
+			continue;
+
+		drm_mm_remove_node(hole);
+		kfree(hole);
+	}
+	mmap_offset_unlock(i915);
+	restore_retire_worker(i915);
+	return err;
+err_obj:
+	i915_gem_object_put(obj);
+	goto out;
+}
+
+static int gtt_set(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	void __iomem *map;
+	int err = 0;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	intel_gt_pm_get(vma->vm->gt);
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out;
+	}
+
+	memset_io(map, POISON_INUSE, obj->base.size);
+	i915_vma_unpin_iomap(vma);
+
+out:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
+}
+
+static int gtt_check(struct drm_i915_gem_object *obj)
+{
+	struct i915_vma *vma;
+	void __iomem *map;
+	int err = 0;
+
+	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	intel_gt_pm_get(vma->vm->gt);
+	map = i915_vma_pin_iomap(vma);
+	i915_vma_unpin(vma);
+	if (IS_ERR(map)) {
+		err = PTR_ERR(map);
+		goto out;
+	}
+
+	if (memchr_inv((void __force *)map, POISON_FREE, obj->base.size)) {
+		pr_err("%s: Write via mmap did not land in backing store (GTT)\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+	}
+	i915_vma_unpin_iomap(vma);
+
+out:
+	intel_gt_pm_put(vma->vm->gt);
+	return err;
+}
+
+static int wc_set(struct drm_i915_gem_object *obj)
+{
+	void *vaddr;
+
+	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	memset(vaddr, POISON_INUSE, obj->base.size);
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	return 0;
+}
+
+static int wc_check(struct drm_i915_gem_object *obj)
+{
+	void *vaddr;
+	int err = 0;
+
+	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
+
+	if (memchr_inv(vaddr, POISON_FREE, obj->base.size)) {
+		pr_err("%s: Write via mmap did not land in backing store (WC)\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+	}
+	i915_gem_object_unpin_map(obj);
+
+	return err;
+}
+
+static bool can_mmap(struct drm_i915_gem_object *obj, enum i915_mmap_type type)
+{
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
+	bool no_map;
+
+	if (obj->ops->mmap_offset)
+		return type == I915_MMAP_TYPE_FIXED;
+	else if (type == I915_MMAP_TYPE_FIXED)
+		return false;
+
+	if (type == I915_MMAP_TYPE_GTT &&
+	    !i915_ggtt_has_aperture(to_gt(i915)->ggtt))
+		return false;
+
+	i915_gem_object_lock(obj, NULL);
+	no_map = (type != I915_MMAP_TYPE_GTT &&
+		  !i915_gem_object_has_struct_page(obj) &&
+		  !i915_gem_object_has_iomem(obj));
+	i915_gem_object_unlock(obj);
+
+	return !no_map;
+}
+
+#define expand32(x) (((x) << 0) | ((x) << 8) | ((x) << 16) | ((x) << 24))
+static int __igt_mmap(struct drm_i915_private *i915,
+		      struct drm_i915_gem_object *obj,
+		      enum i915_mmap_type type)
+{
+	struct vm_area_struct *area;
+	unsigned long addr;
+	int err, i;
+	u64 offset;
+
+	if (!can_mmap(obj, type))
+		return 0;
+
+	err = wc_set(obj);
+	if (err == -ENXIO)
+		err = gtt_set(obj);
+	if (err)
+		return err;
+
+	err = __assign_mmap_offset(obj, type, &offset, NULL);
+	if (err)
+		return err;
+
+	addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	pr_debug("igt_mmap(%s, %d) @ %lx\n", obj->mm.region->name, type, addr);
+
+	mmap_read_lock(current->mm);
+	area = vma_lookup(current->mm, addr);
+	mmap_read_unlock(current->mm);
+	if (!area) {
+		pr_err("%s: Did not create a vm_area_struct for the mmap\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+		goto out_unmap;
+	}
+
+	for (i = 0; i < obj->base.size / sizeof(u32); i++) {
+		u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
+		u32 x;
+
+		if (get_user(x, ux)) {
+			pr_err("%s: Unable to read from mmap, offset:%zd\n",
+			       obj->mm.region->name, i * sizeof(x));
+			err = -EFAULT;
+			goto out_unmap;
+		}
+
+		if (x != expand32(POISON_INUSE)) {
+			pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
+			       obj->mm.region->name,
+			       i * sizeof(x), x, expand32(POISON_INUSE));
+			err = -EINVAL;
+			goto out_unmap;
+		}
+
+		x = expand32(POISON_FREE);
+		if (put_user(x, ux)) {
+			pr_err("%s: Unable to write to mmap, offset:%zd\n",
+			       obj->mm.region->name, i * sizeof(x));
+			err = -EFAULT;
+			goto out_unmap;
+		}
+	}
+
+	if (type == I915_MMAP_TYPE_GTT)
+		intel_gt_flush_ggtt_writes(to_gt(i915));
+
+	err = wc_check(obj);
+	if (err == -ENXIO)
+		err = gtt_check(obj);
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+static int igt_mmap(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		unsigned long sizes[] = {
+			PAGE_SIZE,
+			mr->min_page_size,
+			SZ_4M,
+		};
+		int i;
+
+		if (mr->private)
+			continue;
+
+		for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+			struct drm_i915_gem_object *obj;
+			int err;
+
+			obj = __i915_gem_object_create_user(i915, sizes[i], &mr, 1);
+			if (obj == ERR_PTR(-ENODEV))
+				continue;
+
+			if (IS_ERR(obj))
+				return PTR_ERR(obj);
+
+			err = __igt_mmap(i915, obj, I915_MMAP_TYPE_GTT);
+			if (err == 0)
+				err = __igt_mmap(i915, obj, I915_MMAP_TYPE_WC);
+			if (err == 0)
+				err = __igt_mmap(i915, obj, I915_MMAP_TYPE_FIXED);
+
+			i915_gem_object_put(obj);
+			if (err)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static void igt_close_objects(struct drm_i915_private *i915,
+			      struct list_head *objects)
+{
+	struct drm_i915_gem_object *obj, *on;
+
+	list_for_each_entry_safe(obj, on, objects, st_link) {
+		i915_gem_object_lock(obj, NULL);
+		if (i915_gem_object_has_pinned_pages(obj))
+			i915_gem_object_unpin_pages(obj);
+		/* No polluting the memory region between tests */
+		__i915_gem_object_put_pages(obj);
+		i915_gem_object_unlock(obj);
+		list_del(&obj->st_link);
+		i915_gem_object_put(obj);
+	}
+
+	cond_resched();
+
+	i915_gem_drain_freed_objects(i915);
+}
+
+static void igt_make_evictable(struct list_head *objects)
+{
+	struct drm_i915_gem_object *obj;
+
+	list_for_each_entry(obj, objects, st_link) {
+		i915_gem_object_lock(obj, NULL);
+		if (i915_gem_object_has_pinned_pages(obj))
+			i915_gem_object_unpin_pages(obj);
+		i915_gem_object_unlock(obj);
+	}
+
+	cond_resched();
+}
+
+static int igt_fill_mappable(struct intel_memory_region *mr,
+			     struct list_head *objects)
+{
+	u64 size, total;
+	int err;
+
+	total = 0;
+	size = mr->io_size;
+	do {
+		struct drm_i915_gem_object *obj;
+
+		obj = i915_gem_object_create_region(mr, size, 0, 0);
+		if (IS_ERR(obj)) {
+			err = PTR_ERR(obj);
+			goto err_close;
+		}
+
+		list_add(&obj->st_link, objects);
+
+		err = i915_gem_object_pin_pages_unlocked(obj);
+		if (err) {
+			if (err != -ENXIO && err != -ENOMEM)
+				goto err_close;
+
+			if (size == mr->min_page_size) {
+				err = 0;
+				break;
+			}
+
+			size >>= 1;
+			continue;
+		}
+
+		total += obj->base.size;
+	} while (1);
+
+	pr_info("%s filled=%lluMiB\n", __func__, total >> 20);
+	return 0;
+
+err_close:
+	igt_close_objects(mr->i915, objects);
+	return err;
+}
+
+static int ___igt_mmap_migrate(struct drm_i915_private *i915,
+			       struct drm_i915_gem_object *obj,
+			       unsigned long addr,
+			       bool unfaultable)
+{
+	struct vm_area_struct *area;
+	int err = 0, i;
+
+	pr_info("igt_mmap(%s, %d) @ %lx\n",
+		obj->mm.region->name, I915_MMAP_TYPE_FIXED, addr);
+
+	mmap_read_lock(current->mm);
+	area = vma_lookup(current->mm, addr);
+	mmap_read_unlock(current->mm);
+	if (!area) {
+		pr_err("%s: Did not create a vm_area_struct for the mmap\n",
+		       obj->mm.region->name);
+		err = -EINVAL;
+		goto out_unmap;
+	}
+
+	for (i = 0; i < obj->base.size / sizeof(u32); i++) {
+		u32 __user *ux = u64_to_user_ptr((u64)(addr + i * sizeof(*ux)));
+		u32 x;
+
+		if (get_user(x, ux)) {
+			err = -EFAULT;
+			if (!unfaultable) {
+				pr_err("%s: Unable to read from mmap, offset:%zd\n",
+				       obj->mm.region->name, i * sizeof(x));
+				goto out_unmap;
+			}
+
+			continue;
+		}
+
+		if (unfaultable) {
+			pr_err("%s: Faulted unmappable memory\n",
+			       obj->mm.region->name);
+			err = -EINVAL;
+			goto out_unmap;
+		}
+
+		if (x != expand32(POISON_INUSE)) {
+			pr_err("%s: Read incorrect value from mmap, offset:%zd, found:%x, expected:%x\n",
+			       obj->mm.region->name,
+			       i * sizeof(x), x, expand32(POISON_INUSE));
+			err = -EINVAL;
+			goto out_unmap;
+		}
+
+		x = expand32(POISON_FREE);
+		if (put_user(x, ux)) {
+			pr_err("%s: Unable to write to mmap, offset:%zd\n",
+			       obj->mm.region->name, i * sizeof(x));
+			err = -EFAULT;
+			goto out_unmap;
+		}
+	}
+
+	if (unfaultable) {
+		if (err == -EFAULT)
+			err = 0;
+	} else {
+		obj->flags &= ~I915_BO_ALLOC_GPU_ONLY;
+		err = wc_check(obj);
+	}
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+#define IGT_MMAP_MIGRATE_TOPDOWN     (1 << 0)
+#define IGT_MMAP_MIGRATE_FILL        (1 << 1)
+#define IGT_MMAP_MIGRATE_EVICTABLE   (1 << 2)
+#define IGT_MMAP_MIGRATE_UNFAULTABLE (1 << 3)
+#define IGT_MMAP_MIGRATE_FAIL_GPU    (1 << 4)
+static int __igt_mmap_migrate(struct intel_memory_region **placements,
+			      int n_placements,
+			      struct intel_memory_region *expected_mr,
+			      unsigned int flags)
+{
+	struct drm_i915_private *i915 = placements[0]->i915;
+	struct drm_i915_gem_object *obj;
+	struct i915_request *rq = NULL;
+	unsigned long addr;
+	LIST_HEAD(objects);
+	u64 offset;
+	int err;
+
+	obj = __i915_gem_object_create_user(i915, PAGE_SIZE,
+					    placements,
+					    n_placements);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	if (flags & IGT_MMAP_MIGRATE_TOPDOWN)
+		obj->flags |= I915_BO_ALLOC_GPU_ONLY;
+
+	err = __assign_mmap_offset(obj, I915_MMAP_TYPE_FIXED, &offset, NULL);
+	if (err)
+		goto out_put;
+
+	/*
+	 * This will eventually create a GEM context, due to opening dummy drm
+	 * file, which needs a tiny amount of mappable device memory for the top
+	 * level paging structures(and perhaps scratch), so make sure we
+	 * allocate early, to avoid tears.
+	 */
+	addr = igt_mmap_offset(i915, offset, obj->base.size,
+			       PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr)) {
+		err = addr;
+		goto out_put;
+	}
+
+	if (flags & IGT_MMAP_MIGRATE_FILL) {
+		err = igt_fill_mappable(placements[0], &objects);
+		if (err)
+			goto out_put;
+	}
+
+	err = i915_gem_object_lock(obj, NULL);
+	if (err)
+		goto out_put;
+
+	err = i915_gem_object_pin_pages(obj);
+	if (err) {
+		i915_gem_object_unlock(obj);
+		goto out_put;
+	}
+
+	err = intel_context_migrate_clear(to_gt(i915)->migrate.context, NULL,
+					  obj->mm.pages->sgl, obj->pat_index,
+					  i915_gem_object_is_lmem(obj),
+					  expand32(POISON_INUSE), &rq);
+	i915_gem_object_unpin_pages(obj);
+	if (rq) {
+		err = dma_resv_reserve_fences(obj->base.resv, 1);
+		if (!err)
+			dma_resv_add_fence(obj->base.resv, &rq->fence,
+					   DMA_RESV_USAGE_KERNEL);
+		i915_request_put(rq);
+	}
+	i915_gem_object_unlock(obj);
+	if (err)
+		goto out_put;
+
+	if (flags & IGT_MMAP_MIGRATE_EVICTABLE)
+		igt_make_evictable(&objects);
+
+	if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) {
+		err = i915_gem_object_lock(obj, NULL);
+		if (err)
+			goto out_put;
+
+		/*
+		 * Ensure we only simulate the gpu failuire when faulting the
+		 * pages.
+		 */
+		err = i915_gem_object_wait_moving_fence(obj, true);
+		i915_gem_object_unlock(obj);
+		if (err)
+			goto out_put;
+		i915_ttm_migrate_set_failure_modes(true, false);
+	}
+
+	err = ___igt_mmap_migrate(i915, obj, addr,
+				  flags & IGT_MMAP_MIGRATE_UNFAULTABLE);
+
+	if (!err && obj->mm.region != expected_mr) {
+		pr_err("%s region mismatch %s\n", __func__, expected_mr->name);
+		err = -EINVAL;
+	}
+
+	if (flags & IGT_MMAP_MIGRATE_FAIL_GPU) {
+		struct intel_gt *gt;
+		unsigned int id;
+
+		i915_ttm_migrate_set_failure_modes(false, false);
+
+		for_each_gt(gt, i915, id) {
+			intel_wakeref_t wakeref;
+			bool wedged;
+
+			mutex_lock(&gt->reset.mutex);
+			wedged = test_bit(I915_WEDGED, &gt->reset.flags);
+			mutex_unlock(&gt->reset.mutex);
+			if (!wedged) {
+				pr_err("gt(%u) not wedged\n", id);
+				err = -EINVAL;
+				continue;
+			}
+
+			wakeref = intel_runtime_pm_get(gt->uncore->rpm);
+			igt_global_reset_lock(gt);
+			intel_gt_reset(gt, ALL_ENGINES, NULL);
+			igt_global_reset_unlock(gt);
+			intel_runtime_pm_put(gt->uncore->rpm, wakeref);
+		}
+
+		if (!i915_gem_object_has_unknown_state(obj)) {
+			pr_err("object missing unknown_state\n");
+			err = -EINVAL;
+		}
+	}
+
+out_put:
+	i915_gem_object_put(obj);
+	igt_close_objects(i915, &objects);
+	return err;
+}
+
+static int igt_mmap_migrate(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *system = i915->mm.regions[INTEL_REGION_SMEM];
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		struct intel_memory_region *mixed[] = { mr, system };
+		struct intel_memory_region *single[] = { mr };
+		struct ttm_resource_manager *man = mr->region_private;
+		resource_size_t saved_io_size;
+		int err;
+
+		if (mr->private)
+			continue;
+
+		if (!mr->io_size)
+			continue;
+
+		/*
+		 * For testing purposes let's force small BAR, if not already
+		 * present.
+		 */
+		saved_io_size = mr->io_size;
+		if (mr->io_size == mr->total) {
+			resource_size_t io_size = mr->io_size;
+
+			io_size = rounddown_pow_of_two(io_size >> 1);
+			if (io_size < PAGE_SIZE)
+				continue;
+
+			mr->io_size = io_size;
+			i915_ttm_buddy_man_force_visible_size(man,
+							      io_size >> PAGE_SHIFT);
+		}
+
+		/*
+		 * Allocate in the mappable portion, should be no suprises here.
+		 */
+		err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), mr, 0);
+		if (err)
+			goto out_io_size;
+
+		/*
+		 * Allocate in the non-mappable portion, but force migrating to
+		 * the mappable portion on fault (LMEM -> LMEM)
+		 */
+		err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
+					 IGT_MMAP_MIGRATE_TOPDOWN |
+					 IGT_MMAP_MIGRATE_FILL |
+					 IGT_MMAP_MIGRATE_EVICTABLE);
+		if (err)
+			goto out_io_size;
+
+		/*
+		 * Allocate in the non-mappable portion, but force spilling into
+		 * system memory on fault (LMEM -> SMEM)
+		 */
+		err = __igt_mmap_migrate(mixed, ARRAY_SIZE(mixed), system,
+					 IGT_MMAP_MIGRATE_TOPDOWN |
+					 IGT_MMAP_MIGRATE_FILL);
+		if (err)
+			goto out_io_size;
+
+		/*
+		 * Allocate in the non-mappable portion, but since the mappable
+		 * portion is already full, and we can't spill to system memory,
+		 * then we should expect the fault to fail.
+		 */
+		err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
+					 IGT_MMAP_MIGRATE_TOPDOWN |
+					 IGT_MMAP_MIGRATE_FILL |
+					 IGT_MMAP_MIGRATE_UNFAULTABLE);
+		if (err)
+			goto out_io_size;
+
+		/*
+		 * Allocate in the non-mappable portion, but force migrating to
+		 * the mappable portion on fault (LMEM -> LMEM). We then also
+		 * simulate a gpu error when moving the pages when faulting the
+		 * pages, which should result in wedging the gpu and returning
+		 * SIGBUS in the fault handler, since we can't fallback to
+		 * memcpy.
+		 */
+		err = __igt_mmap_migrate(single, ARRAY_SIZE(single), mr,
+					 IGT_MMAP_MIGRATE_TOPDOWN |
+					 IGT_MMAP_MIGRATE_FILL |
+					 IGT_MMAP_MIGRATE_EVICTABLE |
+					 IGT_MMAP_MIGRATE_FAIL_GPU |
+					 IGT_MMAP_MIGRATE_UNFAULTABLE);
+out_io_size:
+		mr->io_size = saved_io_size;
+		i915_ttm_buddy_man_force_visible_size(man,
+						      mr->io_size >> PAGE_SHIFT);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static const char *repr_mmap_type(enum i915_mmap_type type)
+{
+	switch (type) {
+	case I915_MMAP_TYPE_GTT: return "gtt";
+	case I915_MMAP_TYPE_WB: return "wb";
+	case I915_MMAP_TYPE_WC: return "wc";
+	case I915_MMAP_TYPE_UC: return "uc";
+	case I915_MMAP_TYPE_FIXED: return "fixed";
+	default: return "unknown";
+	}
+}
+
+static bool can_access(struct drm_i915_gem_object *obj)
+{
+	bool access;
+
+	i915_gem_object_lock(obj, NULL);
+	access = i915_gem_object_has_struct_page(obj) ||
+		i915_gem_object_has_iomem(obj);
+	i915_gem_object_unlock(obj);
+
+	return access;
+}
+
+static int __igt_mmap_access(struct drm_i915_private *i915,
+			     struct drm_i915_gem_object *obj,
+			     enum i915_mmap_type type)
+{
+	unsigned long __user *ptr;
+	unsigned long A, B;
+	unsigned long x, y;
+	unsigned long addr;
+	int err;
+	u64 offset;
+
+	memset(&A, 0xAA, sizeof(A));
+	memset(&B, 0xBB, sizeof(B));
+
+	if (!can_mmap(obj, type) || !can_access(obj))
+		return 0;
+
+	err = __assign_mmap_offset(obj, type, &offset, NULL);
+	if (err)
+		return err;
+
+	addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+	ptr = (unsigned long __user *)addr;
+
+	err = __put_user(A, ptr);
+	if (err) {
+		pr_err("%s(%s): failed to write into user mmap\n",
+		       obj->mm.region->name, repr_mmap_type(type));
+		goto out_unmap;
+	}
+
+	intel_gt_flush_ggtt_writes(to_gt(i915));
+
+	err = access_process_vm(current, addr, &x, sizeof(x), 0);
+	if (err != sizeof(x)) {
+		pr_err("%s(%s): access_process_vm() read failed\n",
+		       obj->mm.region->name, repr_mmap_type(type));
+		goto out_unmap;
+	}
+
+	err = access_process_vm(current, addr, &B, sizeof(B), FOLL_WRITE);
+	if (err != sizeof(B)) {
+		pr_err("%s(%s): access_process_vm() write failed\n",
+		       obj->mm.region->name, repr_mmap_type(type));
+		goto out_unmap;
+	}
+
+	intel_gt_flush_ggtt_writes(to_gt(i915));
+
+	err = __get_user(y, ptr);
+	if (err) {
+		pr_err("%s(%s): failed to read from user mmap\n",
+		       obj->mm.region->name, repr_mmap_type(type));
+		goto out_unmap;
+	}
+
+	if (x != A || y != B) {
+		pr_err("%s(%s): failed to read/write values, found (%lx, %lx)\n",
+		       obj->mm.region->name, repr_mmap_type(type),
+		       x, y);
+		err = -EINVAL;
+		goto out_unmap;
+	}
+
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+static int igt_mmap_access(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		if (mr->private)
+			continue;
+
+		obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1);
+		if (obj == ERR_PTR(-ENODEV))
+			continue;
+
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_GTT);
+		if (err == 0)
+			err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WB);
+		if (err == 0)
+			err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_WC);
+		if (err == 0)
+			err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_UC);
+		if (err == 0)
+			err = __igt_mmap_access(i915, obj, I915_MMAP_TYPE_FIXED);
+
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int __igt_mmap_gpu(struct drm_i915_private *i915,
+			  struct drm_i915_gem_object *obj,
+			  enum i915_mmap_type type)
+{
+	struct intel_engine_cs *engine;
+	unsigned long addr;
+	u32 __user *ux;
+	u32 bbe;
+	int err;
+	u64 offset;
+
+	/*
+	 * Verify that the mmap access into the backing store aligns with
+	 * that of the GPU, i.e. that mmap is indeed writing into the same
+	 * page as being read by the GPU.
+	 */
+
+	if (!can_mmap(obj, type))
+		return 0;
+
+	err = wc_set(obj);
+	if (err == -ENXIO)
+		err = gtt_set(obj);
+	if (err)
+		return err;
+
+	err = __assign_mmap_offset(obj, type, &offset, NULL);
+	if (err)
+		return err;
+
+	addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	ux = u64_to_user_ptr((u64)addr);
+	bbe = MI_BATCH_BUFFER_END;
+	if (put_user(bbe, ux)) {
+		pr_err("%s: Unable to write to mmap\n", obj->mm.region->name);
+		err = -EFAULT;
+		goto out_unmap;
+	}
+
+	if (type == I915_MMAP_TYPE_GTT)
+		intel_gt_flush_ggtt_writes(to_gt(i915));
+
+	for_each_uabi_engine(engine, i915) {
+		struct i915_request *rq;
+		struct i915_vma *vma;
+		struct i915_gem_ww_ctx ww;
+
+		vma = i915_vma_instance(obj, engine->kernel_context->vm, NULL);
+		if (IS_ERR(vma)) {
+			err = PTR_ERR(vma);
+			goto out_unmap;
+		}
+
+		i915_gem_ww_ctx_init(&ww, false);
+retry:
+		err = i915_gem_object_lock(obj, &ww);
+		if (!err)
+			err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
+		if (err)
+			goto out_ww;
+
+		rq = i915_request_create(engine->kernel_context);
+		if (IS_ERR(rq)) {
+			err = PTR_ERR(rq);
+			goto out_unpin;
+		}
+
+		err = i915_vma_move_to_active(vma, rq, 0);
+
+		err = engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
+		i915_request_get(rq);
+		i915_request_add(rq);
+
+		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
+			struct drm_printer p =
+				drm_info_printer(engine->i915->drm.dev);
+
+			pr_err("%s(%s, %s): Failed to execute batch\n",
+			       __func__, engine->name, obj->mm.region->name);
+			intel_engine_dump(engine, &p,
+					  "%s\n", engine->name);
+
+			intel_gt_set_wedged(engine->gt);
+			err = -EIO;
+		}
+		i915_request_put(rq);
+
+out_unpin:
+		i915_vma_unpin(vma);
+out_ww:
+		if (err == -EDEADLK) {
+			err = i915_gem_ww_ctx_backoff(&ww);
+			if (!err)
+				goto retry;
+		}
+		i915_gem_ww_ctx_fini(&ww);
+		if (err)
+			goto out_unmap;
+	}
+
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+static int igt_mmap_gpu(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		if (mr->private)
+			continue;
+
+		obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1);
+		if (obj == ERR_PTR(-ENODEV))
+			continue;
+
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_GTT);
+		if (err == 0)
+			err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_WC);
+		if (err == 0)
+			err = __igt_mmap_gpu(i915, obj, I915_MMAP_TYPE_FIXED);
+
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int check_present_pte(pte_t *pte, unsigned long addr, void *data)
+{
+	pte_t ptent = ptep_get(pte);
+
+	if (!pte_present(ptent) || pte_none(ptent)) {
+		pr_err("missing PTE:%lx\n",
+		       (addr - (unsigned long)data) >> PAGE_SHIFT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_absent_pte(pte_t *pte, unsigned long addr, void *data)
+{
+	pte_t ptent = ptep_get(pte);
+
+	if (pte_present(ptent) && !pte_none(ptent)) {
+		pr_err("present PTE:%lx; expected to be revoked\n",
+		       (addr - (unsigned long)data) >> PAGE_SHIFT);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int check_present(unsigned long addr, unsigned long len)
+{
+	return apply_to_page_range(current->mm, addr, len,
+				   check_present_pte, (void *)addr);
+}
+
+static int check_absent(unsigned long addr, unsigned long len)
+{
+	return apply_to_page_range(current->mm, addr, len,
+				   check_absent_pte, (void *)addr);
+}
+
+static int prefault_range(u64 start, u64 len)
+{
+	const char __user *addr, *end;
+	char __maybe_unused c;
+	int err;
+
+	addr = u64_to_user_ptr(start);
+	end = addr + len;
+
+	for (; addr < end; addr += PAGE_SIZE) {
+		err = __get_user(c, addr);
+		if (err)
+			return err;
+	}
+
+	return __get_user(c, end - 1);
+}
+
+static int __igt_mmap_revoke(struct drm_i915_private *i915,
+			     struct drm_i915_gem_object *obj,
+			     enum i915_mmap_type type)
+{
+	unsigned long addr;
+	int err;
+	u64 offset;
+
+	if (!can_mmap(obj, type))
+		return 0;
+
+	err = __assign_mmap_offset(obj, type, &offset, NULL);
+	if (err)
+		return err;
+
+	addr = igt_mmap_offset(i915, offset, obj->base.size, PROT_WRITE, MAP_SHARED);
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	err = prefault_range(addr, obj->base.size);
+	if (err)
+		goto out_unmap;
+
+	err = check_present(addr, obj->base.size);
+	if (err) {
+		pr_err("%s: was not present\n", obj->mm.region->name);
+		goto out_unmap;
+	}
+
+	/*
+	 * After unbinding the object from the GGTT, its address may be reused
+	 * for other objects. Ergo we have to revoke the previous mmap PTE
+	 * access as it no longer points to the same object.
+	 */
+	i915_gem_object_lock(obj, NULL);
+	err = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("Failed to unbind object!\n");
+		goto out_unmap;
+	}
+
+	if (type != I915_MMAP_TYPE_GTT) {
+		i915_gem_object_lock(obj, NULL);
+		__i915_gem_object_put_pages(obj);
+		i915_gem_object_unlock(obj);
+		if (i915_gem_object_has_pages(obj)) {
+			pr_err("Failed to put-pages object!\n");
+			err = -EINVAL;
+			goto out_unmap;
+		}
+	}
+
+	err = check_absent(addr, obj->base.size);
+	if (err) {
+		pr_err("%s: was not absent\n", obj->mm.region->name);
+		goto out_unmap;
+	}
+
+out_unmap:
+	vm_munmap(addr, obj->base.size);
+	return err;
+}
+
+static int igt_mmap_revoke(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_memory_region *mr;
+	enum intel_region_id id;
+
+	for_each_memory_region(mr, i915, id) {
+		struct drm_i915_gem_object *obj;
+		int err;
+
+		if (mr->private)
+			continue;
+
+		obj = __i915_gem_object_create_user(i915, PAGE_SIZE, &mr, 1);
+		if (obj == ERR_PTR(-ENODEV))
+			continue;
+
+		if (IS_ERR(obj))
+			return PTR_ERR(obj);
+
+		err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_GTT);
+		if (err == 0)
+			err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_WC);
+		if (err == 0)
+			err = __igt_mmap_revoke(i915, obj, I915_MMAP_TYPE_FIXED);
+
+		i915_gem_object_put(obj);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_partial_tiling),
+		SUBTEST(igt_smoke_tiling),
+		SUBTEST(igt_mmap_offset_exhaustion),
+		SUBTEST(igt_mmap),
+		SUBTEST(igt_mmap_migrate),
+		SUBTEST(igt_mmap_access),
+		SUBTEST(igt_mmap_revoke),
+		SUBTEST(igt_mmap_gpu),
+	};
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
new file mode 100644
index 0000000000..19e374f68f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_object.c
@@ -0,0 +1,99 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "huge_gem_object.h"
+#include "selftests/igt_flush_test.h"
+#include "selftests/mock_gem_device.h"
+
+static int igt_gem_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	/* Basic test to ensure we can create an object */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	err = 0;
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+static int igt_gem_huge(void *arg)
+{
+	const unsigned long nreal = 509; /* just to be awkward */
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	unsigned long n;
+	int err;
+
+	/* Basic sanitycheck of our huge fake object allocation */
+
+	obj = huge_gem_object(i915,
+			      nreal * PAGE_SIZE,
+			      to_gt(i915)->ggtt->vm.total + PAGE_SIZE);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	err = i915_gem_object_pin_pages_unlocked(obj);
+	if (err) {
+		pr_err("Failed to allocate %lu pages (%lu total), err=%d\n",
+		       nreal, obj->base.size / PAGE_SIZE, err);
+		goto out;
+	}
+
+	for (n = 0; n < obj->base.size / PAGE_SIZE; n++) {
+		if (i915_gem_object_get_page(obj, n) !=
+		    i915_gem_object_get_page(obj, n % nreal)) {
+			pr_err("Page lookup mismatch at index %lu [%lu]\n",
+			       n, n % nreal);
+			err = -EINVAL;
+			goto out_unpin;
+		}
+	}
+
+out_unpin:
+	i915_gem_object_unpin_pages(obj);
+out:
+	i915_gem_object_put(obj);
+	return err;
+}
+
+int i915_gem_object_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gem_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	mock_destroy_device(i915);
+	return err;
+}
+
+int i915_gem_object_live_selftests(struct drm_i915_private *i915)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_gem_huge),
+	};
+
+	return i915_live_subtests(tests, i915);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
new file mode 100644
index 0000000000..d43d8dae0f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_phys.c
@@ -0,0 +1,87 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_selftest.h"
+
+#include "selftests/mock_gem_device.h"
+
+static int mock_phys_object(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct drm_i915_gem_object *obj;
+	int err;
+
+	/* Create an object and bind it to a contiguous set of physical pages,
+	 * i.e. exercise the i915_gem_object_phys API.
+	 */
+
+	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		pr_err("i915_gem_object_create failed, err=%d\n", err);
+		goto out;
+	}
+
+	i915_gem_object_lock(obj, NULL);
+	if (!i915_gem_object_has_struct_page(obj)) {
+		i915_gem_object_unlock(obj);
+		err = -EINVAL;
+		pr_err("shmem has no struct page\n");
+		goto out_obj;
+	}
+
+	err = i915_gem_object_attach_phys(obj, PAGE_SIZE);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("i915_gem_object_attach_phys failed, err=%d\n", err);
+		goto out_obj;
+	}
+
+	if (i915_gem_object_has_struct_page(obj)) {
+		pr_err("i915_gem_object_attach_phys did not create a phys object\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	if (!atomic_read(&obj->mm.pages_pin_count)) {
+		pr_err("i915_gem_object_attach_phys did not pin its phys pages\n");
+		err = -EINVAL;
+		goto out_obj;
+	}
+
+	/* Make the object dirty so that put_pages must do copy back the data */
+	i915_gem_object_lock(obj, NULL);
+	err = i915_gem_object_set_to_gtt_domain(obj, true);
+	i915_gem_object_unlock(obj);
+	if (err) {
+		pr_err("i915_gem_object_set_to_gtt_domain failed with err=%d\n",
+		       err);
+		goto out_obj;
+	}
+
+out_obj:
+	i915_gem_object_put(obj);
+out:
+	return err;
+}
+
+int i915_gem_phys_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(mock_phys_object),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	err = i915_subtests(tests, i915);
+
+	mock_destroy_device(i915);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
new file mode 100644
index 0000000000..20a232a140
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c
@@ -0,0 +1,157 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "igt_gem_utils.h"
+
+#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_internal.h"
+#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
+#include "gt/intel_gpu_commands.h"
+#include "gt/intel_gt.h"
+#include "i915_vma.h"
+#include "i915_drv.h"
+
+#include "i915_request.h"
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
+{
+	struct intel_context *ce;
+	struct i915_request *rq;
+
+	/*
+	 * Pinning the contexts may generate requests in order to acquire
+	 * GGTT space, so do this first before we reserve a seqno for
+	 * ourselves.
+	 */
+	ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
+	if (IS_ERR(ce))
+		return ERR_CAST(ce);
+
+	rq = intel_context_create_request(ce);
+	intel_context_put(ce);
+
+	return rq;
+}
+
+struct i915_vma *
+igt_emit_store_dw(struct i915_vma *vma,
+		  u64 offset,
+		  unsigned long count,
+		  u32 val)
+{
+	struct drm_i915_gem_object *obj;
+	const int ver = GRAPHICS_VER(vma->vm->i915);
+	unsigned long n, size;
+	u32 *cmd;
+	int err;
+
+	size = (4 * count + 1) * sizeof(u32);
+	size = round_up(size, PAGE_SIZE);
+	obj = i915_gem_object_create_internal(vma->vm->i915, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+
+	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
+	if (IS_ERR(cmd)) {
+		err = PTR_ERR(cmd);
+		goto err;
+	}
+
+	GEM_BUG_ON(offset + (count - 1) * PAGE_SIZE > i915_vma_size(vma));
+	offset += i915_vma_offset(vma);
+
+	for (n = 0; n < count; n++) {
+		if (ver >= 8) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4;
+			*cmd++ = lower_32_bits(offset);
+			*cmd++ = upper_32_bits(offset);
+			*cmd++ = val;
+		} else if (ver >= 4) {
+			*cmd++ = MI_STORE_DWORD_IMM_GEN4 |
+				(ver < 6 ? MI_USE_GGTT : 0);
+			*cmd++ = 0;
+			*cmd++ = offset;
+			*cmd++ = val;
+		} else {
+			*cmd++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+			*cmd++ = offset;
+			*cmd++ = val;
+		}
+		offset += PAGE_SIZE;
+	}
+	*cmd = MI_BATCH_BUFFER_END;
+
+	i915_gem_object_flush_map(obj);
+	i915_gem_object_unpin_map(obj);
+
+	intel_gt_chipset_flush(vma->vm->gt);
+
+	vma = i915_vma_instance(obj, vma->vm, NULL);
+	if (IS_ERR(vma)) {
+		err = PTR_ERR(vma);
+		goto err;
+	}
+
+	err = i915_vma_pin(vma, 0, 0, PIN_USER);
+	if (err)
+		goto err;
+
+	return vma;
+
+err:
+	i915_gem_object_put(obj);
+	return ERR_PTR(err);
+}
+
+int igt_gpu_fill_dw(struct intel_context *ce,
+		    struct i915_vma *vma, u64 offset,
+		    unsigned long count, u32 val)
+{
+	struct i915_request *rq;
+	struct i915_vma *batch;
+	unsigned int flags;
+	int err;
+
+	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
+	GEM_BUG_ON(!i915_vma_is_pinned(vma));
+
+	batch = igt_emit_store_dw(vma, offset, count, val);
+	if (IS_ERR(batch))
+		return PTR_ERR(batch);
+
+	rq = intel_context_create_request(ce);
+	if (IS_ERR(rq)) {
+		err = PTR_ERR(rq);
+		goto err_batch;
+	}
+
+	err = igt_vma_move_to_active_unlocked(batch, rq, 0);
+	if (err)
+		goto skip_request;
+
+	err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE);
+	if (err)
+		goto skip_request;
+
+	flags = 0;
+	if (GRAPHICS_VER(ce->vm->i915) <= 5)
+		flags |= I915_DISPATCH_SECURE;
+
+	err = rq->engine->emit_bb_start(rq,
+					i915_vma_offset(batch),
+					i915_vma_size(batch),
+					flags);
+
+skip_request:
+	if (err)
+		i915_request_set_error_once(rq, err);
+	i915_request_add(rq);
+err_batch:
+	i915_vma_unpin_and_release(&batch, 0);
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
new file mode 100644
index 0000000000..71a3ca8a88
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.h
@@ -0,0 +1,46 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#ifndef __IGT_GEM_UTILS_H__
+#define __IGT_GEM_UTILS_H__
+
+#include <linux/types.h>
+
+#include "i915_vma.h"
+
+struct i915_request;
+struct i915_gem_context;
+struct i915_vma;
+
+struct intel_context;
+struct intel_engine_cs;
+
+struct i915_request *
+igt_request_alloc(struct i915_gem_context *ctx, struct intel_engine_cs *engine);
+
+struct i915_vma *
+igt_emit_store_dw(struct i915_vma *vma,
+		  u64 offset,
+		  unsigned long count,
+		  u32 val);
+
+int igt_gpu_fill_dw(struct intel_context *ce,
+		    struct i915_vma *vma, u64 offset,
+		    unsigned long count, u32 val);
+
+static inline int __must_check
+igt_vma_move_to_active_unlocked(struct i915_vma *vma, struct i915_request *rq,
+				unsigned int flags)
+{
+	int err;
+
+	i915_vma_lock(vma);
+	err = i915_vma_move_to_active(vma, rq, flags);
+	i915_vma_unlock(vma);
+	return err;
+}
+
+#endif /* __IGT_GEM_UTILS_H__ */
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.c b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
new file mode 100644
index 0000000000..8ac6726ec1
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.c
@@ -0,0 +1,180 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "i915_file_private.h"
+#include "mock_context.h"
+#include "selftests/mock_drm.h"
+#include "selftests/mock_gtt.h"
+
+struct i915_gem_context *
+mock_context(struct drm_i915_private *i915,
+	     const char *name)
+{
+	struct i915_gem_context *ctx;
+	struct i915_gem_engines *e;
+	struct intel_sseu null_sseu = {};
+
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return NULL;
+
+	kref_init(&ctx->ref);
+	INIT_LIST_HEAD(&ctx->link);
+	ctx->i915 = i915;
+	INIT_WORK(&ctx->release_work, i915_gem_context_release_work);
+
+	mutex_init(&ctx->mutex);
+
+	spin_lock_init(&ctx->stale.lock);
+	INIT_LIST_HEAD(&ctx->stale.engines);
+
+	i915_gem_context_set_persistence(ctx);
+
+	if (name) {
+		struct i915_ppgtt *ppgtt;
+
+		strncpy(ctx->name, name, sizeof(ctx->name) - 1);
+
+		ppgtt = mock_ppgtt(i915, name);
+		if (!ppgtt)
+			goto err_free;
+
+		ctx->vm = &ppgtt->vm;
+	}
+
+	mutex_init(&ctx->engines_mutex);
+	e = default_engines(ctx, null_sseu);
+	if (IS_ERR(e))
+		goto err_vm;
+	RCU_INIT_POINTER(ctx->engines, e);
+
+	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
+	mutex_init(&ctx->lut_mutex);
+
+	return ctx;
+
+err_vm:
+	if (ctx->vm)
+		i915_vm_put(ctx->vm);
+err_free:
+	kfree(ctx);
+	return NULL;
+}
+
+void mock_context_close(struct i915_gem_context *ctx)
+{
+	context_close(ctx);
+}
+
+void mock_init_contexts(struct drm_i915_private *i915)
+{
+	init_contexts(&i915->gem.contexts);
+}
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct file *file)
+{
+	struct drm_i915_file_private *fpriv = to_drm_file(file)->driver_priv;
+	struct i915_gem_proto_context *pc;
+	struct i915_gem_context *ctx;
+	int err;
+	u32 id;
+
+	pc = proto_context_create(i915, 0);
+	if (IS_ERR(pc))
+		return ERR_CAST(pc);
+
+	ctx = i915_gem_create_context(i915, pc);
+	proto_context_close(i915, pc);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	i915_gem_context_set_no_error_capture(ctx);
+
+	err = xa_alloc(&fpriv->context_xa, &id, NULL, xa_limit_32b, GFP_KERNEL);
+	if (err < 0)
+		goto err_ctx;
+
+	gem_context_register(ctx, fpriv, id);
+
+	return ctx;
+
+err_ctx:
+	context_close(ctx);
+	return ERR_PTR(err);
+}
+
+struct i915_gem_context *
+live_context_for_engine(struct intel_engine_cs *engine, struct file *file)
+{
+	struct i915_gem_engines *engines;
+	struct i915_gem_context *ctx;
+	struct intel_sseu null_sseu = {};
+	struct intel_context *ce;
+
+	engines = alloc_engines(1);
+	if (!engines)
+		return ERR_PTR(-ENOMEM);
+
+	ctx = live_context(engine->i915, file);
+	if (IS_ERR(ctx)) {
+		__free_engines(engines, 0);
+		return ctx;
+	}
+
+	ce = intel_context_create(engine);
+	if (IS_ERR(ce)) {
+		__free_engines(engines, 0);
+		return ERR_CAST(ce);
+	}
+
+	intel_context_set_gem(ce, ctx, null_sseu);
+	engines->engines[0] = ce;
+	engines->num_engines = 1;
+
+	mutex_lock(&ctx->engines_mutex);
+	i915_gem_context_set_user_engines(ctx);
+	engines = rcu_replace_pointer(ctx->engines, engines, 1);
+	mutex_unlock(&ctx->engines_mutex);
+
+	engines_idle_release(ctx, engines);
+
+	return ctx;
+}
+
+struct i915_gem_context *
+kernel_context(struct drm_i915_private *i915,
+	       struct i915_address_space *vm)
+{
+	struct i915_gem_context *ctx;
+	struct i915_gem_proto_context *pc;
+
+	pc = proto_context_create(i915, 0);
+	if (IS_ERR(pc))
+		return ERR_CAST(pc);
+
+	if (vm) {
+		if (pc->vm)
+			i915_vm_put(pc->vm);
+		pc->vm = i915_vm_get(vm);
+	}
+
+	ctx = i915_gem_create_context(i915, pc);
+	proto_context_close(i915, pc);
+	if (IS_ERR(ctx))
+		return ctx;
+
+	i915_gem_context_clear_bannable(ctx);
+	i915_gem_context_set_persistence(ctx);
+	i915_gem_context_set_no_error_capture(ctx);
+
+	return ctx;
+}
+
+void kernel_context_close(struct i915_gem_context *ctx)
+{
+	context_close(ctx);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_context.h b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
new file mode 100644
index 0000000000..7a02fd9b58
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_context.h
@@ -0,0 +1,33 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_CONTEXT_H
+#define __MOCK_CONTEXT_H
+
+struct file;
+struct drm_i915_private;
+struct intel_engine_cs;
+struct i915_address_space;
+
+void mock_init_contexts(struct drm_i915_private *i915);
+
+struct i915_gem_context *
+mock_context(struct drm_i915_private *i915,
+	     const char *name);
+
+void mock_context_close(struct i915_gem_context *ctx);
+
+struct i915_gem_context *
+live_context(struct drm_i915_private *i915, struct file *file);
+
+struct i915_gem_context *
+live_context_for_engine(struct intel_engine_cs *engine, struct file *file);
+
+struct i915_gem_context *kernel_context(struct drm_i915_private *i915,
+					struct i915_address_space *vm);
+void kernel_context_close(struct i915_gem_context *ctx);
+
+#endif /* !__MOCK_CONTEXT_H */
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
new file mode 100644
index 0000000000..b2a5882b8f
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c
@@ -0,0 +1,133 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#include "mock_dmabuf.h"
+
+static struct sg_table *mock_map_dma_buf(struct dma_buf_attachment *attachment,
+					 enum dma_data_direction dir)
+{
+	struct mock_dmabuf *mock = to_mock(attachment->dmabuf);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i, err;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (!st)
+		return ERR_PTR(-ENOMEM);
+
+	err = sg_alloc_table(st, mock->npages, GFP_KERNEL);
+	if (err)
+		goto err_free;
+
+	sg = st->sgl;
+	for (i = 0; i < mock->npages; i++) {
+		sg_set_page(sg, mock->pages[i], PAGE_SIZE, 0);
+		sg = sg_next(sg);
+	}
+
+	err = dma_map_sgtable(attachment->dev, st, dir, 0);
+	if (err)
+		goto err_st;
+
+	return st;
+
+err_st:
+	sg_free_table(st);
+err_free:
+	kfree(st);
+	return ERR_PTR(err);
+}
+
+static void mock_unmap_dma_buf(struct dma_buf_attachment *attachment,
+			       struct sg_table *st,
+			       enum dma_data_direction dir)
+{
+	dma_unmap_sgtable(attachment->dev, st, dir, 0);
+	sg_free_table(st);
+	kfree(st);
+}
+
+static void mock_dmabuf_release(struct dma_buf *dma_buf)
+{
+	struct mock_dmabuf *mock = to_mock(dma_buf);
+	int i;
+
+	for (i = 0; i < mock->npages; i++)
+		put_page(mock->pages[i]);
+
+	kfree(mock);
+}
+
+static int mock_dmabuf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+	struct mock_dmabuf *mock = to_mock(dma_buf);
+	void *vaddr;
+
+	vaddr = vm_map_ram(mock->pages, mock->npages, 0);
+	if (!vaddr)
+		return -ENOMEM;
+	iosys_map_set_vaddr(map, vaddr);
+
+	return 0;
+}
+
+static void mock_dmabuf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
+{
+	struct mock_dmabuf *mock = to_mock(dma_buf);
+
+	vm_unmap_ram(map->vaddr, mock->npages);
+}
+
+static int mock_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
+{
+	return -ENODEV;
+}
+
+static const struct dma_buf_ops mock_dmabuf_ops =  {
+	.map_dma_buf = mock_map_dma_buf,
+	.unmap_dma_buf = mock_unmap_dma_buf,
+	.release = mock_dmabuf_release,
+	.mmap = mock_dmabuf_mmap,
+	.vmap = mock_dmabuf_vmap,
+	.vunmap = mock_dmabuf_vunmap,
+};
+
+static struct dma_buf *mock_dmabuf(int npages)
+{
+	struct mock_dmabuf *mock;
+	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
+	struct dma_buf *dmabuf;
+	int i;
+
+	mock = kmalloc(sizeof(*mock) + npages * sizeof(struct page *),
+		       GFP_KERNEL);
+	if (!mock)
+		return ERR_PTR(-ENOMEM);
+
+	mock->npages = npages;
+	for (i = 0; i < npages; i++) {
+		mock->pages[i] = alloc_page(GFP_KERNEL);
+		if (!mock->pages[i])
+			goto err;
+	}
+
+	exp_info.ops = &mock_dmabuf_ops;
+	exp_info.size = npages * PAGE_SIZE;
+	exp_info.flags = O_CLOEXEC;
+	exp_info.priv = mock;
+
+	dmabuf = dma_buf_export(&exp_info);
+	if (IS_ERR(dmabuf))
+		goto err;
+
+	return dmabuf;
+
+err:
+	while (i--)
+		put_page(mock->pages[i]);
+	kfree(mock);
+	return ERR_PTR(-ENOMEM);
+}
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
new file mode 100644
index 0000000000..22818bbb13
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.h
@@ -0,0 +1,22 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_DMABUF_H__
+#define __MOCK_DMABUF_H__
+
+#include <linux/dma-buf.h>
+
+struct mock_dmabuf {
+	int npages;
+	struct page *pages[];
+};
+
+static inline struct mock_dmabuf *to_mock(struct dma_buf *buf)
+{
+	return buf->priv;
+}
+
+#endif /* !__MOCK_DMABUF_H__ */
diff --git a/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
new file mode 100644
index 0000000000..688511afa8
--- /dev/null
+++ b/drivers/gpu/drm/i915/gem/selftests/mock_gem_object.h
@@ -0,0 +1,16 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2016 Intel Corporation
+ */
+
+#ifndef __MOCK_GEM_OBJECT_H__
+#define __MOCK_GEM_OBJECT_H__
+
+#include "gem/i915_gem_object_types.h"
+
+struct mock_object {
+	struct drm_i915_gem_object base;
+};
+
+#endif /* !__MOCK_GEM_OBJECT_H__ */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-11 08:27:49 +0000
commit	ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch)
tree	b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/gpu/drm/i915/gem
parent	Initial commit. (diff)
download	linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip