42 files changed, 8159 insertions, 0 deletions
diff --git a/gfx/layers/mlgpu/BufferCache.cpp b/gfx/layers/mlgpu/BufferCache.cpp
new file mode 100644
index 0000000000..8a2668a5ef
--- /dev/null
+++ b/gfx/layers/mlgpu/BufferCache.cpp
@@ -0,0 +1,96 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "BufferCache.h"
+#include "MLGDevice.h"
+#include "ShaderDefinitionsMLGPU.h"
+#include "mozilla/MathAlgorithms.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace mlg;
+
+BufferCache::BufferCache(MLGDevice* aDevice)
+    : mDevice(aDevice),
+      mFirstSizeClass(CeilingLog2(kConstantBufferElementSize)),
+      mFrameNumber(0),
+      mNextSizeClassToShrink(0) {
+  // Create a cache of buffers for each size class, where each size class is a
+  // power of 2 between the minimum and maximum size of a constant buffer.
+  size_t maxBindSize = mDevice->GetMaxConstantBufferBindSize();
+  MOZ_ASSERT(IsPowerOfTwo(maxBindSize));
+
+  size_t lastSizeClass = CeilingLog2(maxBindSize);
+  MOZ_ASSERT(lastSizeClass >= mFirstSizeClass);
+
+  mCaches.resize(lastSizeClass - mFirstSizeClass + 1);
+}
+
+BufferCache::~BufferCache() = default;
+
+RefPtr<MLGBuffer> BufferCache::GetOrCreateBuffer(size_t aBytes) {
+  size_t sizeClass = CeilingLog2(aBytes);
+  size_t sizeClassIndex = sizeClass - mFirstSizeClass;
+  if (sizeClassIndex >= mCaches.size()) {
+    return mDevice->CreateBuffer(MLGBufferType::Constant, aBytes,
+                                 MLGUsage::Dynamic, nullptr);
+  }
+
+  CachePool& pool = mCaches[sizeClassIndex];
+
+  // See if we've cached a buffer that wasn't used in the past 2 frames. A
+  // buffer used this frame could have already been mapped and written to, and a
+  // buffer used the previous frame might still be in-use by the GPU. While the
+  // latter case is okay, it causes aliasing in the driver. Since content is
+  // double buffered we do not let the compositor get more than 1 frames ahead,
+  // and a count of 2 frames should ensure the buffer is unused.
+  if (!pool.empty() && mFrameNumber >= pool.front().mLastUsedFrame + 2) {
+    RefPtr<MLGBuffer> buffer = pool.front().mBuffer;
+    pool.pop_front();
+    pool.push_back(CacheEntry(mFrameNumber, buffer));
+    MOZ_RELEASE_ASSERT(buffer->GetSize() >= aBytes);
+    return buffer;
+  }
+
+  // Allocate a new buffer and cache it.
+  size_t bytes = (size_t(1) << sizeClass);
+  MOZ_ASSERT(bytes >= aBytes);
+
+  RefPtr<MLGBuffer> buffer = mDevice->CreateBuffer(
+      MLGBufferType::Constant, bytes, MLGUsage::Dynamic, nullptr);
+  if (!buffer) {
+    return nullptr;
+  }
+
+  pool.push_back(CacheEntry(mFrameNumber, buffer));
+  return buffer;
+}
+
+void BufferCache::EndFrame() {
+  // Consider a buffer dead after ~5 seconds assuming 60 fps.
+  static size_t kMaxUnusedFrameCount = 60 * 5;
+
+  // At the end of each frame we pick one size class and see if it has any
+  // buffers that haven't been used for many frames. If so we clear them.
+  // The next frame we'll search the next size class. (This is just to spread
+  // work over more than one frame.)
+  CachePool& pool = mCaches[mNextSizeClassToShrink];
+  while (!pool.empty()) {
+    // Since the deque is sorted oldest-to-newest, front-to-back, we can stop
+    // searching as soon as a buffer is active.
+    if (mFrameNumber - pool.front().mLastUsedFrame < kMaxUnusedFrameCount) {
+      break;
+    }
+    pool.pop_front();
+  }
+  mNextSizeClassToShrink = (mNextSizeClassToShrink + 1) % mCaches.size();
+
+  mFrameNumber++;
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/BufferCache.h b/gfx/layers/mlgpu/BufferCache.h
new file mode 100644
index 0000000000..0f67597e3a
--- /dev/null
+++ b/gfx/layers/mlgpu/BufferCache.h
@@ -0,0 +1,82 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_BufferCache_h
+#define mozilla_gfx_layers_mlgpu_BufferCache_h
+
+#include "mozilla/EnumeratedArray.h"
+#include "mozilla/RefPtr.h"
+#include <deque>
+#include <vector>
+
+namespace mozilla {
+namespace layers {
+
+class MLGBuffer;
+class MLGDevice;
+
+// Cache MLGBuffers based on how long ago they were last used.
+class BufferCache final {
+ public:
+  explicit BufferCache(MLGDevice* aDevice);
+  ~BufferCache();
+
+  // Get a buffer that has at least |aBytes|, or create a new one
+  // if none can be re-used.
+  RefPtr<MLGBuffer> GetOrCreateBuffer(size_t aBytes);
+
+  // Age out old buffers after a frame has been completed.
+  void EndFrame();
+
+ private:
+  // Not RefPtr since this would create a cycle.
+  MLGDevice* mDevice;
+
+  // The first size class is Log2(N), where 16 is the minimum size of a
+  // constant buffer (currently 16 bytes).
+  size_t mFirstSizeClass;
+
+  // Each size class is a power of 2. Each pool of buffers is represented as a
+  // deque, with the least-recently-used (i.e., oldest) buffers at the front,
+  // and most-recently-used (i.e., newest) buffers at the back. To re-use a
+  // buffer it is popped off the front and re-added to the back.
+  //
+  // This is not always efficient use of storage: if a single frame allocates
+  // 300 buffers of the same size, we may keep recycling through all those
+  // buffers for a long time, as long as at least one gets used per frame.
+  // But since buffers use tiny amounts of memory, and they are only mapped
+  // while drawing, it shouldn't be a big deal.
+  struct CacheEntry {
+    CacheEntry() : mLastUsedFrame(0) {}
+    // XXX The copy constructor can be deleted once RefPtr's move constructor is
+    // declared noexcept, see Bug 1612680.
+    CacheEntry(const CacheEntry& aEntry) = default;
+    CacheEntry(CacheEntry&& aEntry) = default;
+    CacheEntry(size_t aLastUsedFrame, MLGBuffer* aBuffer)
+        : mLastUsedFrame(aLastUsedFrame), mBuffer(aBuffer) {}
+
+    uint64_t mLastUsedFrame;
+    RefPtr<MLGBuffer> mBuffer;
+  };
+  typedef std::deque<CacheEntry> CachePool;
+
+  // We track how many frames have occurred to determine the age of cache
+  // entries.
+  uint64_t mFrameNumber;
+
+  // To avoid doing too much work in one frame, we only shrink one size class
+  // per frame.
+  uint64_t mNextSizeClassToShrink;
+
+  // There is one pool of buffers for each power of 2 allocation size. The
+  // maximum buffer size is at most 64KB on Direct3D 11.
+  std::vector<CachePool> mCaches;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_BufferCache_h
diff --git a/gfx/layers/mlgpu/CanvasLayerMLGPU.cpp b/gfx/layers/mlgpu/CanvasLayerMLGPU.cpp
new file mode 100644
index 0000000000..3e47709bc7
--- /dev/null
+++ b/gfx/layers/mlgpu/CanvasLayerMLGPU.cpp
@@ -0,0 +1,81 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "CanvasLayerMLGPU.h"
+#include "composite/CompositableHost.h"  // for CompositableHost
+#include "gfx2DGlue.h"                   // for ToFilter
+#include "gfxEnv.h"                      // for gfxEnv, etc
+#include "mozilla/gfx/Matrix.h"          // for Matrix4x4
+#include "mozilla/gfx/Point.h"           // for Point
+#include "mozilla/gfx/Rect.h"            // for Rect
+#include "mozilla/layers/Compositor.h"   // for Compositor
+#include "mozilla/layers/Effects.h"      // for EffectChain
+#include "mozilla/layers/ImageHost.h"
+#include "mozilla/mozalloc.h"  // for operator delete
+#include "nsAString.h"
+#include "mozilla/RefPtr.h"  // for nsRefPtr
+#include "MaskOperation.h"
+#include "nsISupportsImpl.h"  // for MOZ_COUNT_CTOR, etc
+#include "nsString.h"         // for nsAutoCString
+
+namespace mozilla {
+namespace layers {
+
+using namespace mozilla::gfx;
+
+CanvasLayerMLGPU::CanvasLayerMLGPU(LayerManagerMLGPU* aManager)
+    : CanvasLayer(aManager, nullptr), TexturedLayerMLGPU(aManager) {}
+
+CanvasLayerMLGPU::~CanvasLayerMLGPU() { CleanupResources(); }
+
+Layer* CanvasLayerMLGPU::GetLayer() { return this; }
+
+gfx::SamplingFilter CanvasLayerMLGPU::GetSamplingFilter() {
+  gfx::SamplingFilter filter = mSamplingFilter;
+#ifdef ANDROID
+  // Bug 691354
+  // Using the LINEAR filter we get unexplained artifacts.
+  // Use NEAREST when no scaling is required.
+  Matrix matrix;
+  bool is2D = GetEffectiveTransform().Is2D(&matrix);
+  if (is2D && !ThebesMatrix(matrix).HasNonTranslationOrFlip()) {
+    filter = SamplingFilter::POINT;
+  }
+#endif
+  return filter;
+}
+
+void CanvasLayerMLGPU::PrintInfo(std::stringstream& aStream,
+                                 const char* aPrefix) {
+  CanvasLayer::PrintInfo(aStream, aPrefix);
+  aStream << "\n";
+  if (mHost && mHost->IsAttached()) {
+    nsAutoCString pfx(aPrefix);
+    pfx += "  ";
+    mHost->PrintInfo(aStream, pfx.get());
+  }
+}
+
+void CanvasLayerMLGPU::CleanupResources() {
+  if (mHost) {
+    mHost->Detach(this);
+  }
+  mTexture = nullptr;
+  mBigImageTexture = nullptr;
+  mHost = nullptr;
+}
+
+void CanvasLayerMLGPU::Disconnect() { CleanupResources(); }
+
+void CanvasLayerMLGPU::ClearCachedResources() { CleanupResources(); }
+
+void CanvasLayerMLGPU::SetRenderRegion(LayerIntRegion&& aRegion) {
+  aRegion.AndWith(LayerIntRect::FromUnknownRect(mPictureRect));
+  LayerMLGPU::SetRenderRegion(std::move(aRegion));
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/CanvasLayerMLGPU.h b/gfx/layers/mlgpu/CanvasLayerMLGPU.h
new file mode 100644
index 0000000000..d7c740a40d
--- /dev/null
+++ b/gfx/layers/mlgpu/CanvasLayerMLGPU.h
@@ -0,0 +1,57 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef GFX_CanvasLayerMLGPU_H
+#define GFX_CanvasLayerMLGPU_H
+
+#include "Layers.h"  // for CanvasLayer, etc
+#include "TexturedLayerMLGPU.h"
+#include "mozilla/Attributes.h"                // for override
+#include "mozilla/RefPtr.h"                    // for RefPtr
+#include "mozilla/layers/LayerManagerMLGPU.h"  // for LayerComposite, etc
+#include "mozilla/layers/LayersTypes.h"        // for LayerRenderState, etc
+#include "nsRect.h"                            // for mozilla::gfx::IntRect
+#include "nscore.h"                            // for nsACString
+
+namespace mozilla {
+namespace layers {
+
+class CompositableHost;
+class ImageHost;
+
+class CanvasLayerMLGPU final : public CanvasLayer, public TexturedLayerMLGPU {
+ public:
+  explicit CanvasLayerMLGPU(LayerManagerMLGPU* aManager);
+
+ protected:
+  virtual ~CanvasLayerMLGPU();
+
+ public:
+  Layer* GetLayer() override;
+  void Disconnect() override;
+
+  HostLayer* AsHostLayer() override { return this; }
+  CanvasLayerMLGPU* AsCanvasLayerMLGPU() override { return this; }
+  gfx::SamplingFilter GetSamplingFilter() override;
+  void ClearCachedResources() override;
+  void SetRenderRegion(LayerIntRegion&& aRegion) override;
+
+  MOZ_LAYER_DECL_NAME("CanvasLayerMLGPU", TYPE_CANVAS)
+
+ protected:
+  RefPtr<CanvasRenderer> CreateCanvasRendererInternal() override {
+    MOZ_CRASH("Incompatible surface type");
+    return nullptr;
+  }
+
+  void PrintInfo(std::stringstream& aStream, const char* aPrefix) override;
+  void CleanupResources();
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif /* GFX_CanvasLayerMLGPU_H */
diff --git a/gfx/layers/mlgpu/ClearRegionHelper.h b/gfx/layers/mlgpu/ClearRegionHelper.h
new file mode 100644
index 0000000000..910972c9c1
--- /dev/null
+++ b/gfx/layers/mlgpu/ClearRegionHelper.h
@@ -0,0 +1,30 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_ClearRegionHelper_h
+#define mozilla_gfx_layers_mlgpu_ClearRegionHelper_h
+
+#include "SharedBufferMLGPU.h"
+
+namespace mozilla {
+namespace layers {
+
+// This is a helper class for issuing a clear operation based on either
+// a shader or an API like ClearView. It also works when the depth
+// buffer is enabled.
+struct ClearRegionHelper {
+  // If using ClearView-based clears, we fill mRegions.
+  nsTArray<gfx::IntRect> mRects;
+
+  // If using shader-based clears, we fill these buffers.
+  VertexBufferSection mInput;
+  ConstantBufferSection mVSBuffer;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_ClearRegionHelper_h
diff --git a/gfx/layers/mlgpu/ContainerLayerMLGPU.cpp b/gfx/layers/mlgpu/ContainerLayerMLGPU.cpp
new file mode 100644
index 0000000000..e4fc8f737e
--- /dev/null
+++ b/gfx/layers/mlgpu/ContainerLayerMLGPU.cpp
@@ -0,0 +1,242 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ContainerLayerMLGPU.h"
+#include "mozilla/StaticPrefs_layers.h"
+#include "LayerManagerMLGPU.h"
+#include "MLGDevice.h"
+#include "mozilla/gfx/Rect.h"
+#include "mozilla/gfx/Types.h"
+#include "UnitTransforms.h"
+#include "UtilityMLGPU.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace gfx;
+
+ContainerLayerMLGPU::ContainerLayerMLGPU(LayerManagerMLGPU* aManager)
+    : ContainerLayer(aManager, nullptr),
+      LayerMLGPU(aManager),
+      mInvalidateEntireSurface(false),
+      mSurfaceCopyNeeded(false),
+      mView(nullptr) {}
+
+ContainerLayerMLGPU::~ContainerLayerMLGPU() {
+  while (mFirstChild) {
+    RemoveChild(mFirstChild);
+  }
+}
+
+bool ContainerLayerMLGPU::OnPrepareToRender(FrameBuilder* aBuilder) {
+  mView = nullptr;
+
+  if (!UseIntermediateSurface()) {
+    // Set this so we invalidate the entire cached render target (if any)
+    // if our container uses an intermediate surface again later.
+    mInvalidateEntireSurface = true;
+    return true;
+  }
+
+  mChildrenChanged = false;
+
+  mTargetOffset = GetIntermediateSurfaceRect().TopLeft().ToUnknownPoint();
+  mTargetSize = GetIntermediateSurfaceRect().Size().ToUnknownSize();
+
+  if (mRenderTarget && mRenderTarget->GetSize() != mTargetSize) {
+    mRenderTarget = nullptr;
+  }
+
+  // Note that if a surface copy is needed, we always redraw the
+  // whole surface (on-demand). This is a rare case - the old
+  // Compositor already does this - and it saves us having to
+  // do much more complicated invalidation.
+  bool surfaceCopyNeeded = false;
+  DefaultComputeSupportsComponentAlphaChildren(&surfaceCopyNeeded);
+  if (surfaceCopyNeeded != mSurfaceCopyNeeded || surfaceCopyNeeded) {
+    mInvalidateEntireSurface = true;
+  }
+  mSurfaceCopyNeeded = surfaceCopyNeeded;
+
+  gfx::IntRect viewport(gfx::IntPoint(0, 0), mTargetSize);
+  if (!mRenderTarget || !StaticPrefs::layers_mlgpu_enable_invalidation() ||
+      mInvalidateEntireSurface) {
+    // Fine-grained invalidation is disabled, invalidate everything.
+    mInvalidRect = viewport;
+  } else {
+    // Clamp the invalid rect to the viewport.
+    mInvalidRect -= mTargetOffset;
+    mInvalidRect = mInvalidRect.Intersect(viewport);
+  }
+
+  mInvalidateEntireSurface = false;
+  return true;
+}
+
+static IntRect GetTransformedBounds(Layer* aLayer) {
+  IntRect bounds = aLayer->GetLocalVisibleRegion().GetBounds().ToUnknownRect();
+  if (bounds.IsEmpty()) {
+    return bounds;
+  }
+
+  const Matrix4x4& transform = aLayer->GetEffectiveTransform();
+  Rect rect =
+      transform.TransformAndClipBounds(Rect(bounds), Rect::MaxIntRect());
+  rect.RoundOut();
+  rect.ToIntRect(&bounds);
+  return bounds;
+}
+
+/* static */
+Maybe<IntRect> ContainerLayerMLGPU::FindVisibleBounds(
+    Layer* aLayer, const Maybe<RenderTargetIntRect>& aClip) {
+  AL_LOG("  visiting child %p\n", aLayer);
+  AL_LOG_IF(aClip, "  parent clip: %s\n", Stringify(aClip.value()).c_str());
+
+  ContainerLayer* container = aLayer->AsContainerLayer();
+  if (container) {
+    if (container->UseIntermediateSurface()) {
+      ContainerLayerMLGPU* c =
+          container->AsHostLayer()->AsLayerMLGPU()->AsContainerLayerMLGPU();
+      if (!c) {
+        gfxCriticalError()
+            << "not container: "
+            << container->AsHostLayer()->AsLayerMLGPU()->GetType();
+      }
+      MOZ_RELEASE_ASSERT(c);
+      c->ComputeIntermediateSurfaceBounds();
+    } else {
+      Maybe<IntRect> accumulated = Some(IntRect());
+
+      // Traverse children.
+      for (Layer* child = container->GetFirstChild(); child;
+           child = child->GetNextSibling()) {
+        Maybe<RenderTargetIntRect> clip = aClip;
+        if (const Maybe<ParentLayerIntRect>& childClip =
+                child->AsHostLayer()->GetShadowClipRect()) {
+          RenderTargetIntRect rtChildClip = TransformBy(
+              ViewAs<ParentLayerToRenderTargetMatrix4x4>(
+                  aLayer->GetEffectiveTransform(),
+                  PixelCastJustification::RenderTargetIsParentLayerForRoot),
+              childClip.value());
+          clip = IntersectMaybeRects(clip, Some(rtChildClip));
+          AL_LOG("    target clip: %s\n", Stringify(rtChildClip).c_str());
+          AL_LOG_IF(clip, "    full clip: %s\n",
+                    Stringify(clip.value()).c_str());
+        }
+
+        Maybe<IntRect> childBounds = FindVisibleBounds(child, clip);
+        if (!childBounds) {
+          return Nothing();
+        }
+
+        accumulated = accumulated->SafeUnion(childBounds.value());
+        if (!accumulated) {
+          return Nothing();
+        }
+      }
+      return accumulated;
+    }
+  }
+
+  IntRect bounds = GetTransformedBounds(aLayer);
+  AL_LOG("    layer bounds: %s\n", Stringify(bounds).c_str());
+
+  if (aClip) {
+    bounds = bounds.Intersect(aClip.value().ToUnknownRect());
+    AL_LOG("    clipped bounds: %s\n", Stringify(bounds).c_str());
+  }
+  return Some(bounds);
+}
+
+void ContainerLayerMLGPU::ComputeIntermediateSurfaceBounds() {
+  Maybe<IntRect> bounds = Some(IntRect());
+  for (Layer* child = GetFirstChild(); child; child = child->GetNextSibling()) {
+    Maybe<RenderTargetIntRect> clip = ViewAs<RenderTargetPixel>(
+        child->AsHostLayer()->GetShadowClipRect(),
+        PixelCastJustification::RenderTargetIsParentLayerForRoot);
+    Maybe<IntRect> childBounds = FindVisibleBounds(child, clip);
+    if (!childBounds) {
+      return;
+    }
+
+    bounds = bounds->SafeUnion(childBounds.value());
+    if (!bounds) {
+      return;
+    }
+  }
+
+  SetShadowVisibleRegion(LayerIntRect::FromUnknownRect(bounds.value()));
+}
+
+void ContainerLayerMLGPU::OnLayerManagerChange(LayerManagerMLGPU* aManager) {
+  ClearCachedResources();
+}
+
+RefPtr<MLGRenderTarget> ContainerLayerMLGPU::UpdateRenderTarget(
+    MLGDevice* aDevice, MLGRenderTargetFlags aFlags) {
+  if (mRenderTarget) {
+    return mRenderTarget;
+  }
+
+  mRenderTarget = aDevice->CreateRenderTarget(mTargetSize, aFlags);
+  if (!mRenderTarget) {
+    gfxWarning()
+        << "Failed to create an intermediate render target for ContainerLayer";
+    return nullptr;
+  }
+
+  return mRenderTarget;
+}
+
+void ContainerLayerMLGPU::SetInvalidCompositeRect(const gfx::IntRect* aRect) {
+  // For simplicity we only track the bounds of the invalid area, since regions
+  // are expensive.
+  //
+  // Note we add the bounds to the invalid rect from the last frame, since we
+  // only clear the area that we actually paint. If this overflows we use the
+  // last render target size, since if that changes we'll invalidate everything
+  // anyway.
+  if (aRect) {
+    if (Maybe<gfx::IntRect> result = mInvalidRect.SafeUnion(*aRect)) {
+      mInvalidRect = result.value();
+    } else {
+      mInvalidateEntireSurface = true;
+    }
+  } else {
+    mInvalidateEntireSurface = true;
+  }
+}
+
+void ContainerLayerMLGPU::ClearCachedResources() { mRenderTarget = nullptr; }
+
+bool ContainerLayerMLGPU::IsContentOpaque() {
+  if (GetMixBlendMode() != gfx::CompositionOp::OP_OVER) {
+    // We need to read from what's underneath us, so we consider our content to
+    // be not opaque.
+    return false;
+  }
+  return LayerMLGPU::IsContentOpaque();
+}
+
+const LayerIntRegion& ContainerLayerMLGPU::GetShadowVisibleRegion() {
+  if (!UseIntermediateSurface()) {
+    RecomputeShadowVisibleRegionFromChildren();
+  }
+
+  return mShadowVisibleRegion;
+}
+
+const LayerIntRegion& RefLayerMLGPU::GetShadowVisibleRegion() {
+  if (!UseIntermediateSurface()) {
+    RecomputeShadowVisibleRegionFromChildren();
+  }
+
+  return mShadowVisibleRegion;
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/ContainerLayerMLGPU.h b/gfx/layers/mlgpu/ContainerLayerMLGPU.h
new file mode 100644
index 0000000000..733bd8477f
--- /dev/null
+++ b/gfx/layers/mlgpu/ContainerLayerMLGPU.h
@@ -0,0 +1,96 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_ContainerLayerMLGPU_h
+#define mozilla_gfx_layers_mlgpu_ContainerLayerMLGPU_h
+
+#include "LayerMLGPU.h"
+#include "MLGDeviceTypes.h"
+
+namespace mozilla {
+namespace layers {
+
+class MLGDevice;
+class RenderViewMLGPU;
+
+class ContainerLayerMLGPU final : public ContainerLayer, public LayerMLGPU {
+ public:
+  explicit ContainerLayerMLGPU(LayerManagerMLGPU* aManager);
+  virtual ~ContainerLayerMLGPU();
+
+  MOZ_LAYER_DECL_NAME("ContainerLayerMLGPU", TYPE_CONTAINER)
+
+  HostLayer* AsHostLayer() override { return this; }
+  ContainerLayerMLGPU* AsContainerLayerMLGPU() override { return this; }
+  Layer* GetLayer() override { return this; }
+
+  void ComputeEffectiveTransforms(
+      const gfx::Matrix4x4& aTransformToSurface) override {
+    DefaultComputeEffectiveTransforms(aTransformToSurface);
+  }
+  void SetInvalidCompositeRect(const gfx::IntRect* aRect) override;
+  void ClearCachedResources() override;
+
+  const LayerIntRegion& GetShadowVisibleRegion() override;
+
+  RefPtr<MLGRenderTarget> UpdateRenderTarget(MLGDevice* aDevice,
+                                             MLGRenderTargetFlags aFlags);
+
+  MLGRenderTarget* GetRenderTarget() const { return mRenderTarget; }
+  gfx::IntPoint GetTargetOffset() const { return mTargetOffset; }
+  gfx::IntSize GetTargetSize() const { return mTargetSize; }
+  const gfx::IntRect& GetInvalidRect() const { return mInvalidRect; }
+  void ClearInvalidRect() { mInvalidRect.SetEmpty(); }
+  bool IsContentOpaque() override;
+  bool NeedsSurfaceCopy() const { return mSurfaceCopyNeeded; }
+
+  RenderViewMLGPU* GetRenderView() const { return mView; }
+  void SetRenderView(RenderViewMLGPU* aView) {
+    MOZ_ASSERT(!mView);
+    mView = aView;
+  }
+
+  void ComputeIntermediateSurfaceBounds();
+
+  // Similar to ContainerLayerComposite, we need to include the pres shell
+  // resolution, if there is one, in the layer's post-scale.
+  float GetPostXScale() const override {
+    return mSimpleAttrs.GetPostXScale() * mPresShellResolution;
+  }
+  float GetPostYScale() const override {
+    return mSimpleAttrs.GetPostYScale() * mPresShellResolution;
+  }
+
+ protected:
+  bool OnPrepareToRender(FrameBuilder* aBuilder) override;
+  void OnLayerManagerChange(LayerManagerMLGPU* aManager) override;
+
+ private:
+  static Maybe<gfx::IntRect> FindVisibleBounds(
+      Layer* aLayer, const Maybe<RenderTargetIntRect>& aClip);
+
+  RefPtr<MLGRenderTarget> mRenderTarget;
+
+  // We cache these since occlusion culling can change the visible region.
+  gfx::IntPoint mTargetOffset;
+  gfx::IntSize mTargetSize;
+
+  // The region of the container that needs to be recomposited if visible. We
+  // store this as a rectangle instead of an nsIntRegion for efficiency. This
+  // is in layer coordinates.
+  gfx::IntRect mInvalidRect;
+  bool mInvalidateEntireSurface;
+  bool mSurfaceCopyNeeded;
+
+  // This is only valid for intermediate surfaces while an instance of
+  // FrameBuilder is live.
+  RenderViewMLGPU* mView;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_ContainerLayerMLGPU_h
diff --git a/gfx/layers/mlgpu/FrameBuilder.cpp b/gfx/layers/mlgpu/FrameBuilder.cpp
new file mode 100644
index 0000000000..adb4e8425c
--- /dev/null
+++ b/gfx/layers/mlgpu/FrameBuilder.cpp
@@ -0,0 +1,412 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "FrameBuilder.h"
+#include "ContainerLayerMLGPU.h"
+#include "GeckoProfiler.h"  // for profiler_*
+#include "LayerMLGPU.h"
+#include "LayerManagerMLGPU.h"
+#include "MaskOperation.h"
+#include "MLGDevice.h"  // for MLGSwapChain
+#include "RenderPassMLGPU.h"
+#include "RenderViewMLGPU.h"
+#include "mozilla/gfx/Logging.h"
+#include "mozilla/gfx/Polygon.h"
+#include "mozilla/layers/BSPTree.h"
+#include "mozilla/layers/LayersHelpers.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace mlg;
+
+FrameBuilder::FrameBuilder(LayerManagerMLGPU* aManager,
+                           MLGSwapChain* aSwapChain)
+    : mManager(aManager),
+      mDevice(aManager->GetDevice()),
+      mSwapChain(aSwapChain) {
+  // test_bug1124898.html has a root ColorLayer, so we don't assume the root is
+  // a container.
+  mRoot = mManager->GetRoot()->AsHostLayer()->AsLayerMLGPU();
+}
+
+FrameBuilder::~FrameBuilder() = default;
+
+bool FrameBuilder::Build() {
+  AUTO_PROFILER_LABEL("FrameBuilder::Build", GRAPHICS);
+
+  // AcquireBackBuffer can fail, so we check the result here.
+  RefPtr<MLGRenderTarget> target = mSwapChain->AcquireBackBuffer();
+  if (!target) {
+    return false;
+  }
+
+  // This updates the frame sequence number, so layers can quickly check if
+  // they've already been prepared.
+  LayerMLGPU::BeginFrame();
+
+  // Note: we don't clip draw calls to the invalid region per se, but instead
+  // the region bounds. Clipping all draw calls would incur a significant
+  // CPU cost on large layer trees, and would greatly complicate how draw
+  // rects are added in RenderPassMLGPU, since we would need to break
+  // each call into additional items based on the intersection with the
+  // invalid region.
+  //
+  // Instead we scissor to the invalid region bounds. As a result, all items
+  // affecting the invalid bounds are redrawn, even if not all are in the
+  // precise region.
+  const nsIntRegion& region = mSwapChain->GetBackBufferInvalidRegion();
+
+  mWidgetRenderView = new RenderViewMLGPU(this, target, region);
+
+  // Traverse the layer tree and compute visible region for intermediate
+  // surfaces
+  if (ContainerLayerMLGPU* root =
+          mRoot->AsLayerMLGPU()->AsContainerLayerMLGPU()) {
+    root->ComputeIntermediateSurfaceBounds();
+  }
+
+  // Traverse the layer tree and assign each layer to tiles.
+  {
+    Maybe<gfx::Polygon> geometry;
+    RenderTargetIntRect clip(0, 0, target->GetSize().width,
+                             target->GetSize().height);
+
+    AssignLayer(mRoot->GetLayer(), mWidgetRenderView, clip,
+                std::move(geometry));
+  }
+
+  // Build the default mask buffer.
+  {
+    MaskInformation defaultMaskInfo(1.0f, false);
+    if (!mDevice->GetSharedPSBuffer()->Allocate(&mDefaultMaskInfo,
+                                                defaultMaskInfo)) {
+      return false;
+    }
+  }
+
+  // Build render passes and buffer information for each pass.
+  mWidgetRenderView->FinishBuilding();
+  mWidgetRenderView->Prepare();
+
+  // Prepare masks that need to be combined.
+  for (const auto& pair : mCombinedTextureMasks) {
+    pair.second->PrepareForRendering();
+  }
+
+  FinishCurrentLayerBuffer();
+  FinishCurrentMaskRectBuffer();
+  return true;
+}
+
+void FrameBuilder::Render() {
+  AUTO_PROFILER_LABEL("FrameBuilder::Render", GRAPHICS);
+
+  // Render combined masks into single mask textures.
+  for (const auto& pair : mCombinedTextureMasks) {
+    pair.second->Render();
+  }
+
+  // Render to all targets, front-to-back.
+  mWidgetRenderView->Render();
+}
+
+void FrameBuilder::AssignLayer(Layer* aLayer, RenderViewMLGPU* aView,
+                               const RenderTargetIntRect& aClipRect,
+                               Maybe<gfx::Polygon>&& aGeometry) {
+  LayerMLGPU* layer = aLayer->AsHostLayer()->AsLayerMLGPU();
+
+  if (ContainerLayer* container = aLayer->AsContainerLayer()) {
+    // This returns false if we don't need to (or can't) process the layer any
+    // further. This always returns false for non-leaf ContainerLayers.
+    if (!ProcessContainerLayer(container, aView, aClipRect, aGeometry)) {
+      return;
+    }
+  } else {
+    // Set the precomputed clip and any textures/resources that are needed.
+    if (!layer->PrepareToRender(this, aClipRect)) {
+      return;
+    }
+  }
+
+  // If we are dealing with a nested 3D context, we might need to transform
+  // the geometry back to the coordinate space of the current layer.
+  if (aGeometry) {
+    TransformLayerGeometry(aLayer, aGeometry);
+  }
+
+  // Finally, assign the layer to a rendering batch in the current render
+  // target.
+  layer->AssignToView(this, aView, std::move(aGeometry));
+}
+
+bool FrameBuilder::ProcessContainerLayer(ContainerLayer* aContainer,
+                                         RenderViewMLGPU* aView,
+                                         const RenderTargetIntRect& aClipRect,
+                                         Maybe<gfx::Polygon>& aGeometry) {
+  LayerMLGPU* layer = aContainer->AsHostLayer()->AsLayerMLGPU();
+
+  // Diagnostic information for bug 1387467.
+  if (!layer) {
+    gfxDevCrash(gfx::LogReason::InvalidLayerType)
+        << "Layer type is invalid: " << aContainer->Name();
+    return false;
+  }
+
+  // We don't want to traverse containers twice, so we only traverse them if
+  // they haven't been prepared yet.
+  bool isFirstVisit = !layer->IsPrepared();
+  if (isFirstVisit && !layer->PrepareToRender(this, aClipRect)) {
+    return false;
+  }
+
+  if (!aContainer->UseIntermediateSurface()) {
+    // In case the layer previously required an intermediate surface, we
+    // clear any intermediate render targets here.
+    layer->ClearCachedResources();
+
+    // This is a pass-through container, so we just process children and
+    // instruct AssignLayer to early-return.
+    ProcessChildList(aContainer, aView, aClipRect, aGeometry);
+    return false;
+  }
+
+  // If this is the first visit of the container this frame, and the
+  // container has an unpainted area, we traverse the container. Note that
+  // RefLayers do not have intermediate surfaces so this is guaranteed
+  // to be a full-fledged ContainerLayerMLGPU.
+  ContainerLayerMLGPU* viewContainer = layer->AsContainerLayerMLGPU();
+  if (!viewContainer) {
+    gfxDevCrash(gfx::LogReason::InvalidLayerType)
+        << "Container layer type is invalid: " << aContainer->Name();
+    return false;
+  }
+
+  if (isFirstVisit && !viewContainer->GetInvalidRect().IsEmpty()) {
+    // The RenderView constructor automatically attaches itself to the parent.
+    RefPtr<RenderViewMLGPU> view =
+        new RenderViewMLGPU(this, viewContainer, aView);
+    ProcessChildList(aContainer, view, aClipRect, Nothing());
+    view->FinishBuilding();
+  }
+  return true;
+}
+
+void FrameBuilder::ProcessChildList(
+    ContainerLayer* aContainer, RenderViewMLGPU* aView,
+    const RenderTargetIntRect& aParentClipRect,
+    const Maybe<gfx::Polygon>& aParentGeometry) {
+  nsTArray<LayerPolygon> polygons = aContainer->SortChildrenBy3DZOrder(
+      ContainerLayer::SortMode::WITH_GEOMETRY);
+
+  // Visit layers in front-to-back order.
+  for (auto iter = polygons.rbegin(); iter != polygons.rend(); iter++) {
+    LayerPolygon& entry = *iter;
+    Layer* child = entry.layer;
+    if (child->IsBackfaceHidden() || !child->IsVisible()) {
+      continue;
+    }
+
+    RenderTargetIntRect clip = child->CalculateScissorRect(aParentClipRect);
+    if (clip.IsEmpty()) {
+      continue;
+    }
+
+    Maybe<gfx::Polygon> geometry;
+    if (aParentGeometry && entry.geometry) {
+      // Both parent and child are split.
+      geometry = Some(aParentGeometry->ClipPolygon(*entry.geometry));
+    } else if (aParentGeometry) {
+      geometry = aParentGeometry;
+    } else if (entry.geometry) {
+      geometry = std::move(entry.geometry);
+    }
+
+    AssignLayer(child, aView, clip, std::move(geometry));
+  }
+}
+
+bool FrameBuilder::AddLayerToConstantBuffer(ItemInfo& aItem) {
+  LayerMLGPU* layer = aItem.layer;
+
+  // If this layer could appear multiple times, cache it.
+  if (aItem.geometry) {
+    if (mLayerBufferMap.Get(layer, &aItem.layerIndex)) {
+      return true;
+    }
+  }
+
+  LayerConstants* info = AllocateLayerInfo(aItem);
+  if (!info) {
+    return false;
+  }
+
+  // Note we do not use GetEffectiveTransformForBuffer, since we calculate
+  // the correct scaling when we build texture coordinates.
+  Layer* baseLayer = layer->GetLayer();
+  const gfx::Matrix4x4& transform = baseLayer->GetEffectiveTransform();
+
+  memcpy(&info->transform, &transform._11, 64);
+  info->clipRect = gfx::Rect(layer->GetComputedClipRect().ToUnknownRect());
+  info->maskIndex = 0;
+  if (MaskOperation* op = layer->GetMask()) {
+    // Note: we use 0 as an invalid index, and so indices are offset by 1.
+    gfx::Rect rect = op->ComputeMaskRect(baseLayer);
+    AddMaskRect(rect, &info->maskIndex);
+  }
+
+  if (aItem.geometry) {
+    mLayerBufferMap.Put(layer, aItem.layerIndex);
+  }
+  return true;
+}
+
+MaskOperation* FrameBuilder::AddMaskOperation(LayerMLGPU* aLayer) {
+  Layer* layer = aLayer->GetLayer();
+  MOZ_ASSERT(layer->HasMaskLayers());
+
+  // Multiple masks are combined into a single mask.
+  if ((layer->GetMaskLayer() && layer->GetAncestorMaskLayerCount()) ||
+      layer->GetAncestorMaskLayerCount() > 1) {
+    // Since each mask can be moved independently of the other, we must create
+    // a separate combined mask for every new positioning we encounter.
+    MaskTextureList textures;
+    if (Layer* maskLayer = layer->GetMaskLayer()) {
+      AppendToMaskTextureList(textures, maskLayer);
+    }
+    for (size_t i = 0; i < layer->GetAncestorMaskLayerCount(); i++) {
+      AppendToMaskTextureList(textures, layer->GetAncestorMaskLayerAt(i));
+    }
+
+    auto iter = mCombinedTextureMasks.find(textures);
+    if (iter != mCombinedTextureMasks.end()) {
+      return iter->second;
+    }
+
+    RefPtr<MaskCombineOperation> op = new MaskCombineOperation(this);
+    op->Init(textures);
+
+    mCombinedTextureMasks[textures] = op;
+    return op;
+  }
+
+  Layer* maskLayer = layer->GetMaskLayer() ? layer->GetMaskLayer()
+                                           : layer->GetAncestorMaskLayerAt(0);
+  RefPtr<TextureSource> texture = GetMaskLayerTexture(maskLayer);
+  if (!texture) {
+    return nullptr;
+  }
+
+  RefPtr<MaskOperation> op;
+  mSingleTextureMasks.Get(texture, getter_AddRefs(op));
+  if (op) {
+    return op;
+  }
+
+  RefPtr<MLGTexture> wrapped = mDevice->CreateTexture(texture);
+
+  op = new MaskOperation(this, wrapped);
+  mSingleTextureMasks.Put(texture, RefPtr{op});
+  return op;
+}
+
+void FrameBuilder::RetainTemporaryLayer(LayerMLGPU* aLayer) {
+  // This should only be used with temporary layers. Temporary layers do not
+  // have parents.
+  MOZ_ASSERT(!aLayer->GetLayer()->GetParent());
+  mTemporaryLayers.push_back(aLayer->GetLayer());
+}
+
+MLGRenderTarget* FrameBuilder::GetWidgetRT() {
+  return mWidgetRenderView->GetRenderTarget();
+}
+
+LayerConstants* FrameBuilder::AllocateLayerInfo(ItemInfo& aItem) {
+  if (((mCurrentLayerBuffer.Length() + 1) * sizeof(LayerConstants)) >
+      mDevice->GetMaxConstantBufferBindSize()) {
+    FinishCurrentLayerBuffer();
+    mLayerBufferMap.Clear();
+    mCurrentLayerBuffer.ClearAndRetainStorage();
+  }
+
+  LayerConstants* info = mCurrentLayerBuffer.AppendElement(mozilla::fallible);
+  if (!info) {
+    return nullptr;
+  }
+
+  aItem.layerIndex = mCurrentLayerBuffer.Length() - 1;
+  return info;
+}
+
+void FrameBuilder::FinishCurrentLayerBuffer() {
+  if (mCurrentLayerBuffer.IsEmpty()) {
+    return;
+  }
+
+  // Note: we append the buffer even if we couldn't allocate one, since
+  // that keeps the indices sane.
+  ConstantBufferSection section;
+  mDevice->GetSharedVSBuffer()->Allocate(
+      &section, mCurrentLayerBuffer.Elements(), mCurrentLayerBuffer.Length());
+  mLayerBuffers.AppendElement(section);
+}
+
+size_t FrameBuilder::CurrentLayerBufferIndex() const {
+  // The mask rect buffer list doesn't contain the buffer currently being
+  // built, so we don't subtract 1 here.
+  return mLayerBuffers.Length();
+}
+
+ConstantBufferSection FrameBuilder::GetLayerBufferByIndex(size_t aIndex) const {
+  if (aIndex >= mLayerBuffers.Length()) {
+    return ConstantBufferSection();
+  }
+  return mLayerBuffers[aIndex];
+}
+
+bool FrameBuilder::AddMaskRect(const gfx::Rect& aRect, uint32_t* aOutIndex) {
+  if (((mCurrentMaskRectList.Length() + 1) * sizeof(gfx::Rect)) >
+      mDevice->GetMaxConstantBufferBindSize()) {
+    FinishCurrentMaskRectBuffer();
+    mCurrentMaskRectList.ClearAndRetainStorage();
+  }
+
+  mCurrentMaskRectList.AppendElement(aRect);
+
+  // Mask indices start at 1 so the shader can use 0 as a no-mask indicator.
+  *aOutIndex = mCurrentMaskRectList.Length();
+  return true;
+}
+
+void FrameBuilder::FinishCurrentMaskRectBuffer() {
+  if (mCurrentMaskRectList.IsEmpty()) {
+    return;
+  }
+
+  // Note: we append the buffer even if we couldn't allocate one, since
+  // that keeps the indices sane.
+  ConstantBufferSection section;
+  mDevice->GetSharedVSBuffer()->Allocate(
+      &section, mCurrentMaskRectList.Elements(), mCurrentMaskRectList.Length());
+  mMaskRectBuffers.AppendElement(section);
+}
+
+size_t FrameBuilder::CurrentMaskRectBufferIndex() const {
+  // The mask rect buffer list doesn't contain the buffer currently being
+  // built, so we don't subtract 1 here.
+  return mMaskRectBuffers.Length();
+}
+
+ConstantBufferSection FrameBuilder::GetMaskRectBufferByIndex(
+    size_t aIndex) const {
+  if (aIndex >= mMaskRectBuffers.Length()) {
+    return ConstantBufferSection();
+  }
+  return mMaskRectBuffers[aIndex];
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/FrameBuilder.h b/gfx/layers/mlgpu/FrameBuilder.h
new file mode 100644
index 0000000000..5cf5714767
--- /dev/null
+++ b/gfx/layers/mlgpu/FrameBuilder.h
@@ -0,0 +1,126 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_FrameBuilder_h
+#define mozilla_gfx_layers_mlgpu_FrameBuilder_h
+
+#include "mozilla/RefPtr.h"
+#include "mozilla/gfx/Point.h"
+#include "mozilla/gfx/Types.h"
+#include "MaskOperation.h"
+#include "MLGDevice.h"
+#include "nsDataHashtable.h"
+#include "nsRefPtrHashtable.h"
+#include "ShaderDefinitionsMLGPU.h"
+#include "SharedBufferMLGPU.h"
+#include "Units.h"
+#include <map>
+#include <vector>
+
+namespace mozilla {
+namespace layers {
+
+class ContainerLayer;
+class ContainerLayerMLGPU;
+class Layer;
+class LayerMLGPU;
+class LayerManagerMLGPU;
+class MLGDevice;
+class MLGRenderTarget;
+class MLGSwapChain;
+class RenderViewMLGPU;
+struct ItemInfo;
+
+class FrameBuilder final {
+ public:
+  FrameBuilder(LayerManagerMLGPU* aManager, MLGSwapChain* aSwapChain);
+  ~FrameBuilder();
+
+  bool Build();
+  void Render();
+
+  bool AddLayerToConstantBuffer(ItemInfo& aItem);
+
+  LayerManagerMLGPU* GetManager() const { return mManager; }
+  MLGDevice* GetDevice() const { return mDevice; }
+  const ConstantBufferSection& GetDefaultMaskInfo() const {
+    return mDefaultMaskInfo;
+  }
+
+  // Called during tile construction. Finds or adds a mask layer chain to the
+  // cache, that will be flattened as a dependency to rendering batches.
+  MaskOperation* AddMaskOperation(LayerMLGPU* aLayer);
+
+  // Note: These should only be called during batch construction.
+  size_t CurrentLayerBufferIndex() const;
+  size_t CurrentMaskRectBufferIndex() const;
+
+  // These are called during rendering, and may return null if a buffer
+  // couldn't be allocated.
+  ConstantBufferSection GetLayerBufferByIndex(size_t aIndex) const;
+  ConstantBufferSection GetMaskRectBufferByIndex(size_t aIndex) const;
+
+  // Hold a layer alive until the frame ends.
+  void RetainTemporaryLayer(LayerMLGPU* aLayer);
+
+  MLGRenderTarget* GetWidgetRT();
+
+ private:
+  void AssignLayer(Layer* aLayer, RenderViewMLGPU* aView,
+                   const RenderTargetIntRect& aClipRect,
+                   Maybe<gfx::Polygon>&& aGeometry);
+
+  void ProcessChildList(ContainerLayer* aContainer, RenderViewMLGPU* aView,
+                        const RenderTargetIntRect& aParentClipRect,
+                        const Maybe<gfx::Polygon>& aParentGeometry);
+
+  mlg::LayerConstants* AllocateLayerInfo(ItemInfo& aItem);
+  bool AddMaskRect(const gfx::Rect& aRect, uint32_t* aOutIndex);
+  void FinishCurrentLayerBuffer();
+  void FinishCurrentMaskRectBuffer();
+
+  // Returns true to continue, false to stop - false does not indicate
+  // failure.
+  bool ProcessContainerLayer(ContainerLayer* aLayer, RenderViewMLGPU* aView,
+                             const RenderTargetIntRect& aClipRect,
+                             Maybe<gfx::Polygon>& aGeometry);
+
+ private:
+  RefPtr<LayerManagerMLGPU> mManager;
+  RefPtr<MLGDevice> mDevice;
+  RefPtr<MLGSwapChain> mSwapChain;
+  RefPtr<RenderViewMLGPU> mWidgetRenderView;
+  LayerMLGPU* mRoot;
+
+  // Each time we consume a layer in a tile, we make sure a constant buffer
+  // exists that contains information about the layer. The mapping is valid
+  // for the most recent buffer, and once the buffer fills, we begin a new
+  // one and clear the map.
+  nsTArray<ConstantBufferSection> mLayerBuffers;
+  nsTArray<mlg::LayerConstants> mCurrentLayerBuffer;
+  nsDataHashtable<nsPtrHashKey<LayerMLGPU>, uint32_t> mLayerBufferMap;
+
+  // We keep mask rects in a separate buffer since they're rare.
+  nsTArray<ConstantBufferSection> mMaskRectBuffers;
+  nsTArray<gfx::Rect> mCurrentMaskRectList;
+
+  // For values that *can* change every render pass, but almost certainly do
+  // not, we pre-fill and cache some buffers.
+  ConstantBufferSection mDefaultMaskInfo;
+
+  // Cache for MaskOperations.
+  nsRefPtrHashtable<nsRefPtrHashKey<TextureSource>, MaskOperation>
+      mSingleTextureMasks;
+  std::map<MaskTextureList, RefPtr<MaskCombineOperation>> mCombinedTextureMasks;
+
+  // This list of temporary layers is wiped out when the frame is completed.
+  std::vector<RefPtr<Layer>> mTemporaryLayers;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_FrameBuilder_h
diff --git a/gfx/layers/mlgpu/ImageLayerMLGPU.cpp b/gfx/layers/mlgpu/ImageLayerMLGPU.cpp
new file mode 100644
index 0000000000..9d236bf91f
--- /dev/null
+++ b/gfx/layers/mlgpu/ImageLayerMLGPU.cpp
@@ -0,0 +1,108 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ImageLayerMLGPU.h"
+#include "LayerManagerMLGPU.h"
+
+namespace mozilla {
+
+using namespace gfx;
+
+namespace layers {
+
+ImageLayerMLGPU::ImageLayerMLGPU(LayerManagerMLGPU* aManager)
+    : ImageLayer(aManager, static_cast<HostLayer*>(this)),
+      TexturedLayerMLGPU(aManager) {}
+
+ImageLayerMLGPU::~ImageLayerMLGPU() { CleanupResources(); }
+
+void ImageLayerMLGPU::ComputeEffectiveTransforms(
+    const gfx::Matrix4x4& aTransformToSurface) {
+  Matrix4x4 local = GetLocalTransform();
+
+  // Snap image edges to pixel boundaries.
+  gfxRect sourceRect(0, 0, 0, 0);
+  if (mHost && mHost->IsAttached()) {
+    IntSize size = mHost->GetImageSize();
+    sourceRect.SizeTo(size.width, size.height);
+  }
+
+  // Snap our local transform first, and snap the inherited transform as well.
+  // This makes our snapping equivalent to what would happen if our content
+  // was drawn into a PaintedLayer (gfxContext would snap using the local
+  // transform, then we'd snap again when compositing the PaintedLayer).
+  mEffectiveTransform = SnapTransform(local, sourceRect, nullptr) *
+                        SnapTransformTranslation(aTransformToSurface, nullptr);
+  mEffectiveTransformForBuffer = mEffectiveTransform;
+
+  if (mScaleMode == ScaleMode::STRETCH && mScaleToSize.width != 0.0 &&
+      mScaleToSize.height != 0.0) {
+    Size scale(sourceRect.Width() / mScaleToSize.width,
+               sourceRect.Height() / mScaleToSize.height);
+    mScale = Some(scale);
+  }
+
+  ComputeEffectiveTransformForMaskLayers(aTransformToSurface);
+}
+
+gfx::SamplingFilter ImageLayerMLGPU::GetSamplingFilter() {
+  return ImageLayer::GetSamplingFilter();
+}
+
+bool ImageLayerMLGPU::IsContentOpaque() {
+  if (mPictureRect.Width() == 0 || mPictureRect.Height() == 0) {
+    return false;
+  }
+  if (mScaleMode == ScaleMode::STRETCH) {
+    return gfx::IsOpaque(mHost->CurrentTextureHost()->GetFormat());
+  }
+  return false;
+}
+
+void ImageLayerMLGPU::SetRenderRegion(LayerIntRegion&& aRegion) {
+  switch (mScaleMode) {
+    case ScaleMode::STRETCH:
+      // See bug 1264142.
+      aRegion.AndWith(
+          LayerIntRect(0, 0, mScaleToSize.width, mScaleToSize.height));
+      break;
+    default:
+      // Clamp the visible region to the texture size. (see bug 1396507)
+      MOZ_ASSERT(mScaleMode == ScaleMode::SCALE_NONE);
+      aRegion.AndWith(
+          LayerIntRect(0, 0, mPictureRect.Width(), mPictureRect.Height()));
+      break;
+  }
+  LayerMLGPU::SetRenderRegion(std::move(aRegion));
+}
+
+void ImageLayerMLGPU::CleanupResources() {
+  if (mHost) {
+    mHost->CleanupResources();
+    mHost->Detach(this);
+  }
+  mTexture = nullptr;
+  mBigImageTexture = nullptr;
+  mHost = nullptr;
+}
+
+void ImageLayerMLGPU::PrintInfo(std::stringstream& aStream,
+                                const char* aPrefix) {
+  ImageLayer::PrintInfo(aStream, aPrefix);
+  if (mHost && mHost->IsAttached()) {
+    aStream << "\n";
+    nsAutoCString pfx(aPrefix);
+    pfx += "  ";
+    mHost->PrintInfo(aStream, pfx.get());
+  }
+}
+
+void ImageLayerMLGPU::Disconnect() { CleanupResources(); }
+
+void ImageLayerMLGPU::ClearCachedResources() { CleanupResources(); }
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/ImageLayerMLGPU.h b/gfx/layers/mlgpu/ImageLayerMLGPU.h
new file mode 100644
index 0000000000..33d2e4f3e9
--- /dev/null
+++ b/gfx/layers/mlgpu/ImageLayerMLGPU.h
@@ -0,0 +1,51 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_GFX_IMAGELAYERMLGPU_H
+#define MOZILLA_GFX_IMAGELAYERMLGPU_H
+
+#include "LayerManagerMLGPU.h"
+#include "TexturedLayerMLGPU.h"
+#include "ImageLayers.h"
+#include "mozilla/layers/ImageHost.h"
+
+namespace mozilla {
+namespace layers {
+
+class ImageLayerMLGPU final : public ImageLayer, public TexturedLayerMLGPU {
+ public:
+  explicit ImageLayerMLGPU(LayerManagerMLGPU* aManager);
+
+  Layer* GetLayer() override { return this; }
+  HostLayer* AsHostLayer() override { return this; }
+  ImageLayerMLGPU* AsImageLayerMLGPU() override { return this; }
+
+  void ComputeEffectiveTransforms(
+      const gfx::Matrix4x4& aTransformToSurface) override;
+  void SetRenderRegion(LayerIntRegion&& aRegion) override;
+  gfx::SamplingFilter GetSamplingFilter() override;
+  void ClearCachedResources() override;
+  bool IsContentOpaque() override;
+  void Disconnect() override;
+
+  Maybe<gfx::Size> GetPictureScale() const override { return mScale; }
+
+  MOZ_LAYER_DECL_NAME("ImageLayerMLGPU", TYPE_IMAGE)
+
+ protected:
+  virtual ~ImageLayerMLGPU();
+
+  void PrintInfo(std::stringstream& aStream, const char* aPrefix) override;
+  void CleanupResources();
+
+ private:
+  Maybe<gfx::Size> mScale;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif
diff --git a/gfx/layers/mlgpu/LayerMLGPU.cpp b/gfx/layers/mlgpu/LayerMLGPU.cpp
new file mode 100644
index 0000000000..714214e83b
--- /dev/null
+++ b/gfx/layers/mlgpu/LayerMLGPU.cpp
@@ -0,0 +1,141 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LayerManagerMLGPU.h"
+#include "RenderPassMLGPU.h"
+#include "RenderViewMLGPU.h"
+#include "FrameBuilder.h"
+#include "mozilla/layers/ImageHost.h"
+#include "mozilla/layers/LayerManagerComposite.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace gfx;
+
+uint64_t LayerMLGPU::sFrameKey = 0;
+
+LayerMLGPU::~LayerMLGPU() = default;
+
+LayerMLGPU::LayerMLGPU(LayerManagerMLGPU* aManager)
+    : HostLayer(aManager),
+      mFrameKey(0),
+      mComputedOpacity(0.0),
+      mPrepared(false) {}
+
+/* static */
+void LayerMLGPU::BeginFrame() { sFrameKey++; }
+
+LayerManagerMLGPU* LayerMLGPU::GetManager() {
+  return static_cast<LayerManagerMLGPU*>(mCompositorManager);
+}
+
+bool LayerMLGPU::PrepareToRender(FrameBuilder* aBuilder,
+                                 const RenderTargetIntRect& aClipRect) {
+  if (mFrameKey == sFrameKey) {
+    return mPrepared;
+  }
+  mFrameKey = sFrameKey;
+  mPrepared = false;
+
+  Layer* layer = GetLayer();
+
+  // Only container layers may have mixed blend modes.
+  MOZ_ASSERT_IF(layer->GetMixBlendMode() != CompositionOp::OP_OVER,
+                layer->GetType() == Layer::TYPE_CONTAINER);
+
+  mComputedClipRect = aClipRect;
+  mComputedOpacity = layer->GetEffectiveOpacity();
+
+  if (layer->HasMaskLayers()) {
+    mMask = aBuilder->AddMaskOperation(this);
+    // If the mask has no texture, the pixel shader can't read any non-zero
+    // values for the mask, so we can consider the whole thing invisible.
+    if (mMask && mMask->IsEmpty()) {
+      mComputedOpacity = 0.0f;
+    }
+  } else {
+    mMask = nullptr;
+  }
+
+  if (!OnPrepareToRender(aBuilder)) {
+    return false;
+  }
+
+  mPrepared = true;
+  return true;
+}
+
+void LayerMLGPU::AssignToView(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                              Maybe<gfx::Polygon>&& aGeometry) {
+  AddBoundsToView(aBuilder, aView, std::move(aGeometry));
+}
+
+void LayerMLGPU::AddBoundsToView(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                                 Maybe<gfx::Polygon>&& aGeometry) {
+  IntRect bounds = GetClippedBoundingBox(aView, aGeometry);
+  aView->AddItem(this, bounds, std::move(aGeometry));
+}
+
+IntRect LayerMLGPU::GetClippedBoundingBox(
+    RenderViewMLGPU* aView, const Maybe<gfx::Polygon>& aGeometry) {
+  MOZ_ASSERT(IsPrepared());
+
+  Layer* layer = GetLayer();
+  const Matrix4x4& transform = layer->GetEffectiveTransform();
+
+  Rect rect =
+      aGeometry
+          ? aGeometry->BoundingBox()
+          : Rect(layer->GetLocalVisibleRegion().GetBounds().ToUnknownRect());
+  rect = transform.TransformBounds(rect);
+  rect.MoveBy(-aView->GetTargetOffset());
+  rect = rect.Intersect(Rect(mComputedClipRect.ToUnknownRect()));
+
+  IntRect bounds;
+  rect.RoundOut();
+  rect.ToIntRect(&bounds);
+  return bounds;
+}
+
+void LayerMLGPU::MarkPrepared() {
+  mFrameKey = sFrameKey;
+  mPrepared = true;
+}
+
+bool LayerMLGPU::IsContentOpaque() { return GetLayer()->IsOpaque(); }
+
+void LayerMLGPU::SetRenderRegion(LayerIntRegion&& aRegion) {
+  mRenderRegion = std::move(aRegion);
+}
+
+void LayerMLGPU::SetLayerManager(HostLayerManager* aManager) {
+  LayerManagerMLGPU* manager = aManager->AsLayerManagerMLGPU();
+  MOZ_RELEASE_ASSERT(manager);
+
+  HostLayer::SetLayerManager(aManager);
+  GetLayer()->SetManager(manager, this);
+
+  if (CompositableHost* host = GetCompositableHost()) {
+    host->SetTextureSourceProvider(manager->GetTextureSourceProvider());
+  }
+
+  OnLayerManagerChange(manager);
+}
+
+RefLayerMLGPU::RefLayerMLGPU(LayerManagerMLGPU* aManager)
+    : RefLayer(aManager, static_cast<HostLayer*>(this)), LayerMLGPU(aManager) {}
+
+RefLayerMLGPU::~RefLayerMLGPU() = default;
+
+ColorLayerMLGPU::ColorLayerMLGPU(LayerManagerMLGPU* aManager)
+    : ColorLayer(aManager, static_cast<HostLayer*>(this)),
+      LayerMLGPU(aManager) {}
+
+ColorLayerMLGPU::~ColorLayerMLGPU() = default;
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/LayerMLGPU.h b/gfx/layers/mlgpu/LayerMLGPU.h
new file mode 100644
index 0000000000..d11106688f
--- /dev/null
+++ b/gfx/layers/mlgpu/LayerMLGPU.h
@@ -0,0 +1,161 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_LayerMLGPU_h
+#define mozilla_gfx_layers_mlgpu_LayerMLGPU_h
+
+#include "Layers.h"
+#include "mozilla/layers/LayerManagerComposite.h"
+
+namespace mozilla {
+namespace layers {
+
+class CanvasLayerMLGPU;
+class ColorLayerMLGPU;
+class ContainerLayerMLGPU;
+class FrameBuilder;
+class ImageHost;
+class ImageLayerMLGPU;
+class LayerManagerMLGPU;
+class MaskOperation;
+class MLGRenderTarget;
+class PaintedLayerMLGPU;
+class RefLayerMLGPU;
+class RenderViewMLGPU;
+class TexturedLayerMLGPU;
+class TextureSource;
+
+class LayerMLGPU : public HostLayer {
+ public:
+  LayerMLGPU* AsLayerMLGPU() override { return this; }
+  virtual PaintedLayerMLGPU* AsPaintedLayerMLGPU() { return nullptr; }
+  virtual ImageLayerMLGPU* AsImageLayerMLGPU() { return nullptr; }
+  virtual CanvasLayerMLGPU* AsCanvasLayerMLGPU() { return nullptr; }
+  virtual ContainerLayerMLGPU* AsContainerLayerMLGPU() { return nullptr; }
+  virtual RefLayerMLGPU* AsRefLayerMLGPU() { return nullptr; }
+  virtual ColorLayerMLGPU* AsColorLayerMLGPU() { return nullptr; }
+  virtual TexturedLayerMLGPU* AsTexturedLayerMLGPU() { return nullptr; }
+
+  static void BeginFrame();
+
+  // Ask the layer to acquire any resources or per-frame information needed
+  // to render. If this returns false, the layer will be skipped entirely.
+  bool PrepareToRender(FrameBuilder* aBuilder,
+                       const RenderTargetIntRect& aClipRect);
+
+  Layer::LayerType GetType() { return GetLayer()->GetType(); }
+  const RenderTargetIntRect& GetComputedClipRect() const {
+    return mComputedClipRect;
+  }
+  MaskOperation* GetMask() const { return mMask; }
+  float GetComputedOpacity() const { return mComputedOpacity; }
+
+  // Return the bounding box of this layer in render target space, clipped to
+  // the computed clip rect, and rounded out to an integer rect.
+  gfx::IntRect GetClippedBoundingBox(RenderViewMLGPU* aView,
+                                     const Maybe<gfx::Polygon>& aGeometry);
+
+  // If this layer has already been prepared for the current frame, return
+  // true. This should only be used to guard against double-processing
+  // container layers after 3d-sorting.
+  bool IsPrepared() const { return mFrameKey == sFrameKey && mPrepared; }
+
+  // Return true if the content in this layer is opaque (not factoring in
+  // blend modes or opacity), false otherwise.
+  virtual bool IsContentOpaque();
+
+  // Returns the region that this layer will draw pixels to. If the layer and
+  // its content are opaque, this is the layer's opaque region.
+  const LayerIntRegion& GetRenderRegion() const { return mRenderRegion; }
+
+  // Some layers have visible regions that extend beyond what is actually drawn.
+  // When performing CPU-based occlusion culling we must clamp the visible
+  // region to the actual area. Note that if a layer is opaque, it must not
+  // expand its visible region such that it might include non-opaque pixels, as
+  // may be the case for PaintedLayers with a restricted visible region.
+  virtual void SetRenderRegion(LayerIntRegion&& aRegion);
+
+  virtual void AssignToView(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                            Maybe<gfx::Polygon>&& aGeometry);
+
+  // Callback for when PrepareToRender has finished successfully. If this
+  // returns false, PrepareToRender will return false.
+  virtual bool OnPrepareToRender(FrameBuilder* aBuilder) { return true; }
+
+  virtual void ClearCachedResources() {}
+  CompositableHost* GetCompositableHost() override { return nullptr; }
+
+ protected:
+  LayerMLGPU(LayerManagerMLGPU* aManager);
+  ~LayerMLGPU();
+  LayerManagerMLGPU* GetManager();
+
+  void AddBoundsToView(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                       Maybe<gfx::Polygon>&& aGeometry);
+
+  void MarkPrepared();
+
+  // We don't want derivative layers overriding this directly - we provide a
+  // callback instead.
+  void SetLayerManager(HostLayerManager* aManager) override;
+  virtual void OnLayerManagerChange(LayerManagerMLGPU* aManager) {}
+
+ private:
+  // This is a monotonic counter used to check whether a layer appears twice
+  // when 3d sorting.
+  static uint64_t sFrameKey;
+
+ protected:
+  // These are set during PrepareToRender.
+  RenderTargetIntRect mComputedClipRect;
+  RefPtr<MaskOperation> mMask;
+  uint64_t mFrameKey;
+  float mComputedOpacity;
+  bool mPrepared;
+  LayerIntRegion mRenderRegion;
+};
+
+class RefLayerMLGPU final : public RefLayer, public LayerMLGPU {
+ public:
+  explicit RefLayerMLGPU(LayerManagerMLGPU* aManager);
+  virtual ~RefLayerMLGPU();
+
+  // Layer
+  HostLayer* AsHostLayer() override { return this; }
+  RefLayerMLGPU* AsRefLayerMLGPU() override { return this; }
+  Layer* GetLayer() override { return this; }
+
+  // ContainerLayer
+  void ComputeEffectiveTransforms(
+      const gfx::Matrix4x4& aTransformToSurface) override {
+    DefaultComputeEffectiveTransforms(aTransformToSurface);
+  }
+
+  const LayerIntRegion& GetShadowVisibleRegion() override;
+
+  MOZ_LAYER_DECL_NAME("RefLayerMLGPU", TYPE_REF)
+};
+
+class ColorLayerMLGPU final : public ColorLayer, public LayerMLGPU {
+ public:
+  explicit ColorLayerMLGPU(LayerManagerMLGPU* aManager);
+  virtual ~ColorLayerMLGPU();
+
+  // LayerMLGPU
+  bool IsContentOpaque() override { return mColor.a >= 1.0f; }
+
+  // Layer
+  HostLayer* AsHostLayer() override { return this; }
+  ColorLayerMLGPU* AsColorLayerMLGPU() override { return this; }
+  Layer* GetLayer() override { return this; }
+
+  MOZ_LAYER_DECL_NAME("ColorLayerMLGPU", TYPE_COLOR)
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_LayerMLGPU_h
diff --git a/gfx/layers/mlgpu/LayerManagerMLGPU.cpp b/gfx/layers/mlgpu/LayerManagerMLGPU.cpp
new file mode 100644
index 0000000000..0379ffe785
--- /dev/null
+++ b/gfx/layers/mlgpu/LayerManagerMLGPU.cpp
@@ -0,0 +1,588 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LayerManagerMLGPU.h"
+#include "LayerTreeInvalidation.h"
+#include "PaintedLayerMLGPU.h"
+#include "ImageLayerMLGPU.h"
+#include "CanvasLayerMLGPU.h"
+#include "ContainerLayerMLGPU.h"
+#include "GeckoProfiler.h"  // for profiler_*
+#include "gfxEnv.h"         // for gfxEnv
+#include "MLGDevice.h"
+#include "RenderPassMLGPU.h"
+#include "RenderViewMLGPU.h"
+#include "ShaderDefinitionsMLGPU.h"
+#include "SharedBufferMLGPU.h"
+#include "UnitTransforms.h"
+#include "TextureSourceProviderMLGPU.h"
+#include "TreeTraversal.h"
+#include "FrameBuilder.h"
+#include "UtilityMLGPU.h"
+#include "CompositionRecorder.h"
+#include "mozilla/layers/Diagnostics.h"
+#include "mozilla/layers/TextRenderer.h"
+#include "mozilla/StaticPrefs_layers.h"
+#include "mozilla/ToString.h"
+
+#ifdef XP_WIN
+#  include "mozilla/widget/WinCompositorWidget.h"
+#  include "mozilla/gfx/DeviceManagerDx.h"
+#endif
+
+namespace mozilla {
+namespace layers {
+
+using namespace gfx;
+
+static const int kDebugOverlayX = 2;
+static const int kDebugOverlayY = 5;
+static const int kDebugOverlayMaxWidth = 600;
+static const int kDebugOverlayMaxHeight = 96;
+
+class RecordedFrameMLGPU : public RecordedFrame {
+ public:
+  RecordedFrameMLGPU(MLGDevice* aDevice, MLGTexture* aTexture,
+                     const TimeStamp& aTimestamp)
+      : RecordedFrame(aTimestamp), mDevice(aDevice) {
+    mSoftTexture =
+        aDevice->CreateTexture(aTexture->GetSize(), SurfaceFormat::B8G8R8A8,
+                               MLGUsage::Staging, MLGTextureFlags::None);
+
+    aDevice->CopyTexture(mSoftTexture, IntPoint(), aTexture,
+                         IntRect(IntPoint(), aTexture->GetSize()));
+  }
+
+  ~RecordedFrameMLGPU() {
+    if (mIsMapped) {
+      mDevice->Unmap(mSoftTexture);
+    }
+  }
+
+  virtual already_AddRefed<gfx::DataSourceSurface> GetSourceSurface() override {
+    if (mDataSurf) {
+      return RefPtr<DataSourceSurface>(mDataSurf).forget();
+    }
+    MLGMappedResource map;
+    if (!mDevice->Map(mSoftTexture, MLGMapType::READ, &map)) {
+      return nullptr;
+    }
+
+    mIsMapped = true;
+    mDataSurf = Factory::CreateWrappingDataSourceSurface(
+        map.mData, map.mStride, mSoftTexture->GetSize(),
+        SurfaceFormat::B8G8R8A8);
+    return RefPtr<DataSourceSurface>(mDataSurf).forget();
+  }
+
+ private:
+  RefPtr<MLGDevice> mDevice;
+  // Software texture in VRAM.
+  RefPtr<MLGTexture> mSoftTexture;
+  RefPtr<DataSourceSurface> mDataSurf;
+  bool mIsMapped = false;
+};
+
+LayerManagerMLGPU::LayerManagerMLGPU(widget::CompositorWidget* aWidget)
+    : mWidget(aWidget),
+      mDrawDiagnostics(false),
+      mUsingInvalidation(false),
+      mCurrentFrame(nullptr),
+      mDebugFrameNumber(0) {
+  if (!aWidget) {
+    return;
+  }
+
+#ifdef WIN32
+  mDevice = DeviceManagerDx::Get()->GetMLGDevice();
+#endif
+  if (!mDevice || !mDevice->IsValid()) {
+    gfxWarning() << "Could not acquire an MLGDevice!";
+    return;
+  }
+
+  mSwapChain = mDevice->CreateSwapChainForWidget(aWidget);
+  if (!mSwapChain) {
+    gfxWarning() << "Could not acquire an MLGSwapChain!";
+    return;
+  }
+
+  mDiagnostics = MakeUnique<Diagnostics>();
+  mTextRenderer = new TextRenderer();
+}
+
+LayerManagerMLGPU::~LayerManagerMLGPU() {
+  if (mTextureSourceProvider) {
+    mTextureSourceProvider->Destroy();
+  }
+}
+
+bool LayerManagerMLGPU::Initialize() {
+  if (!mDevice || !mSwapChain) {
+    return false;
+  }
+
+  mTextureSourceProvider = new TextureSourceProviderMLGPU(this, mDevice);
+  return true;
+}
+
+void LayerManagerMLGPU::Destroy() {
+  if (IsDestroyed()) {
+    return;
+  }
+
+  LayerManager::Destroy();
+  mProfilerScreenshotGrabber.Destroy();
+
+  if (mDevice && mDevice->IsValid()) {
+    mDevice->Flush();
+  }
+  if (mSwapChain) {
+    mSwapChain->Destroy();
+    mSwapChain = nullptr;
+  }
+  if (mTextureSourceProvider) {
+    mTextureSourceProvider->Destroy();
+    mTextureSourceProvider = nullptr;
+  }
+  mWidget = nullptr;
+  mDevice = nullptr;
+}
+
+void LayerManagerMLGPU::ForcePresent() {
+  if (!mDevice->IsValid()) {
+    return;
+  }
+
+  IntSize windowSize = mWidget->GetClientSize().ToUnknownSize();
+  if (mSwapChain->GetSize() != windowSize) {
+    return;
+  }
+
+  mSwapChain->ForcePresent();
+}
+
+already_AddRefed<ContainerLayer> LayerManagerMLGPU::CreateContainerLayer() {
+  return MakeAndAddRef<ContainerLayerMLGPU>(this);
+}
+
+already_AddRefed<ColorLayer> LayerManagerMLGPU::CreateColorLayer() {
+  return MakeAndAddRef<ColorLayerMLGPU>(this);
+}
+
+already_AddRefed<RefLayer> LayerManagerMLGPU::CreateRefLayer() {
+  return MakeAndAddRef<RefLayerMLGPU>(this);
+}
+
+already_AddRefed<PaintedLayer> LayerManagerMLGPU::CreatePaintedLayer() {
+  return MakeAndAddRef<PaintedLayerMLGPU>(this);
+}
+
+already_AddRefed<ImageLayer> LayerManagerMLGPU::CreateImageLayer() {
+  return MakeAndAddRef<ImageLayerMLGPU>(this);
+}
+
+already_AddRefed<CanvasLayer> LayerManagerMLGPU::CreateCanvasLayer() {
+  return MakeAndAddRef<CanvasLayerMLGPU>(this);
+}
+
+TextureFactoryIdentifier LayerManagerMLGPU::GetTextureFactoryIdentifier() {
+  TextureFactoryIdentifier ident;
+  if (mDevice) {
+    ident = mDevice->GetTextureFactoryIdentifier(mWidget);
+  }
+  ident.mUsingAdvancedLayers = true;
+  return ident;
+}
+
+LayersBackend LayerManagerMLGPU::GetBackendType() {
+  return mDevice ? mDevice->GetLayersBackend() : LayersBackend::LAYERS_NONE;
+}
+
+void LayerManagerMLGPU::SetRoot(Layer* aLayer) { mRoot = aLayer; }
+
+bool LayerManagerMLGPU::BeginTransaction(const nsCString& aURL) { return true; }
+
+void LayerManagerMLGPU::BeginTransactionWithDrawTarget(
+    gfx::DrawTarget* aTarget, const gfx::IntRect& aRect) {
+  MOZ_ASSERT(!mTarget);
+
+  mTarget = aTarget;
+  mTargetRect = aRect;
+}
+
+// Helper class for making sure textures are unlocked.
+class MOZ_STACK_CLASS AutoUnlockAllTextures final {
+ public:
+  explicit AutoUnlockAllTextures(MLGDevice* aDevice) : mDevice(aDevice) {}
+  ~AutoUnlockAllTextures() { mDevice->UnlockAllTextures(); }
+
+ private:
+  RefPtr<MLGDevice> mDevice;
+};
+
+void LayerManagerMLGPU::EndTransaction(const TimeStamp& aTimeStamp,
+                                       EndTransactionFlags aFlags) {
+  AUTO_PROFILER_LABEL("LayerManager::EndTransaction", GRAPHICS);
+
+  TextureSourceProvider::AutoReadUnlockTextures unlock(mTextureSourceProvider);
+
+  if (!mRoot || (aFlags & END_NO_IMMEDIATE_REDRAW) || !mWidget) {
+    return;
+  }
+
+  if (!mDevice->IsValid()) {
+    // Waiting device reset handling.
+    return;
+  }
+
+  mCompositionOpportunityId = mCompositionOpportunityId.Next();
+  SetCompositionTime(aTimeStamp);
+
+  mCompositionStartTime = TimeStamp::Now();
+
+  IntSize windowSize = mWidget->GetClientSize().ToUnknownSize();
+  if (windowSize.IsEmpty()) {
+    return;
+  }
+
+  // Resize the window if needed.
+#ifdef XP_WIN
+  mWidget->AsWindows()->UpdateCompositorWndSizeIfNecessary();
+#endif
+  if (mSwapChain->GetSize() != windowSize) {
+    // Note: all references to the backbuffer must be cleared.
+    mDevice->SetRenderTarget(nullptr);
+    if (!mSwapChain->ResizeBuffers(windowSize)) {
+      gfxCriticalNote << "Could not resize the swapchain ("
+                      << hexa(windowSize.width) << ","
+                      << hexa(windowSize.height) << ")";
+      return;
+    }
+  }
+
+  // Don't draw the diagnostic overlay if we want to snapshot the output.
+  mDrawDiagnostics = StaticPrefs::layers_acceleration_draw_fps() && !mTarget;
+  mUsingInvalidation = StaticPrefs::layers_mlgpu_enable_invalidation();
+  mDebugFrameNumber++;
+
+  AL_LOG("--- Compositing frame %d ---\n", mDebugFrameNumber);
+
+  // Compute transforms - and the changed area, if enabled.
+  mRoot->ComputeEffectiveTransforms(Matrix4x4());
+  ComputeInvalidRegion();
+
+  // Build and execute draw commands, and present.
+  if (PreRender()) {
+    Composite();
+    PostRender();
+  }
+
+  mTextureSourceProvider->FlushPendingNotifyNotUsed();
+
+  // Finish composition.
+  mLastCompositionEndTime = TimeStamp::Now();
+}
+
+void LayerManagerMLGPU::Composite() {
+  if (gfxEnv::SkipComposition()) {
+    return;
+  }
+
+  AUTO_PROFILER_LABEL("LayerManagerMLGPU::Composite", GRAPHICS);
+
+  // Don't composite if we're minimized/hidden, or if there is nothing to draw.
+  if (mWidget->IsHidden()) {
+    return;
+  }
+
+  // Make sure the diagnostic area gets invalidated. We do this now, rather than
+  // earlier, so we don't accidentally cause extra composites.
+  Maybe<IntRect> diagnosticRect;
+  if (mDrawDiagnostics) {
+    diagnosticRect =
+        Some(IntRect(kDebugOverlayX, kDebugOverlayY, kDebugOverlayMaxWidth,
+                     kDebugOverlayMaxHeight));
+  }
+
+  AL_LOG("Computed invalid region: %s\n", Stringify(mInvalidRegion).c_str());
+
+  // Now that we have the final invalid region, give it to the swap chain which
+  // will tell us if we still need to render.
+  if (!mSwapChain->ApplyNewInvalidRegion(std::move(mInvalidRegion),
+                                         diagnosticRect)) {
+    mProfilerScreenshotGrabber.NotifyEmptyFrame();
+
+    // Discard the current payloads. These payloads did not require a composite
+    // (they caused no changes to anything visible), so we don't want to measure
+    // their latency.
+    mPayload.Clear();
+
+    return;
+  }
+
+  AutoUnlockAllTextures autoUnlock(mDevice);
+
+  mDevice->BeginFrame();
+
+  RenderLayers();
+
+  if (mDrawDiagnostics) {
+    DrawDebugOverlay();
+  }
+
+  if (mTarget) {
+    mSwapChain->CopyBackbuffer(mTarget, mTargetRect);
+    mTarget = nullptr;
+    mTargetRect = IntRect();
+  }
+  mSwapChain->Present();
+
+  // We call this here to mimic the behavior in LayerManagerComposite, as to
+  // not change what Talos measures. That is, we do not record an empty frame
+  // as a frame, since we short-circuit at the top of this function.
+  RecordFrame();
+
+  mDevice->EndFrame();
+
+  // Free the old cloned property tree, then clone a new one. Note that we do
+  // this after compositing, since layer preparation actually mutates the layer
+  // tree (for example, ImageHost::mLastFrameID). We want the property tree to
+  // pick up these changes. Similarly, we are careful to not mutate the tree
+  // in any way that we *don't* want LayerProperties to catch, lest we cause
+  // extra invalidation.
+  //
+  // Note that the old Compositor performs occlusion culling directly on the
+  // shadow visible region, and does this *before* cloning layer tree
+  // properties. Advanced Layers keeps the occlusion region separate and
+  // performs invalidation against the clean layer tree.
+  mClonedLayerTreeProperties = nullptr;
+  mClonedLayerTreeProperties = LayerProperties::CloneFrom(mRoot);
+
+  PayloadPresented(TimeStamp::Now());
+
+  mPayload.Clear();
+}
+
+void LayerManagerMLGPU::RenderLayers() {
+  AUTO_PROFILER_LABEL("LayerManagerMLGPU::RenderLayers", GRAPHICS);
+
+  // Traverse the layer tree and assign each layer to a render target.
+  FrameBuilder builder(this, mSwapChain);
+  mCurrentFrame = &builder;
+
+  if (!builder.Build()) {
+    return;
+  }
+
+  if (mDrawDiagnostics) {
+    mDiagnostics->RecordPrepareTime(
+        (TimeStamp::Now() - mCompositionStartTime).ToMilliseconds());
+  }
+
+  // Make sure we acquire/release the sync object.
+  if (!mDevice->Synchronize()) {
+    // Catastrophic failure - probably a device reset.
+    return;
+  }
+
+  TimeStamp start = TimeStamp::Now();
+
+  // Upload shared buffers.
+  mDevice->FinishSharedBufferUse();
+
+  // Prepare the pipeline.
+  if (mDrawDiagnostics) {
+    IntSize size = mSwapChain->GetBackBufferInvalidRegion().GetBounds().Size();
+    uint32_t numPixels = size.width * size.height;
+    mDevice->StartDiagnostics(numPixels);
+  }
+
+  // Execute all render passes.
+  builder.Render();
+
+  mProfilerScreenshotGrabber.MaybeGrabScreenshot(
+      mDevice, builder.GetWidgetRT()->GetTexture());
+
+  if (mCompositionRecorder) {
+    bool hasContentPaint = false;
+    for (CompositionPayload& payload : mPayload) {
+      if (payload.mType == CompositionPayloadType::eContentPaint) {
+        hasContentPaint = true;
+        break;
+      }
+    }
+
+    if (hasContentPaint) {
+      RefPtr<RecordedFrame> frame = new RecordedFrameMLGPU(
+          mDevice, builder.GetWidgetRT()->GetTexture(), TimeStamp::Now());
+      mCompositionRecorder->RecordFrame(frame);
+    }
+  }
+  mCurrentFrame = nullptr;
+
+  if (mDrawDiagnostics) {
+    mDiagnostics->RecordCompositeTime(
+        (TimeStamp::Now() - start).ToMilliseconds());
+    mDevice->EndDiagnostics();
+  }
+}
+
+void LayerManagerMLGPU::DrawDebugOverlay() {
+  IntSize windowSize = mSwapChain->GetSize();
+
+  GPUStats stats;
+  mDevice->GetDiagnostics(&stats);
+  stats.mScreenPixels = windowSize.width * windowSize.height;
+
+  std::string text = mDiagnostics->GetFrameOverlayString(stats);
+  RefPtr<TextureSource> texture = mTextRenderer->RenderText(
+      mTextureSourceProvider, text, 600, TextRenderer::FontType::FixedWidth);
+  if (!texture) {
+    return;
+  }
+
+  if (mUsingInvalidation &&
+      (texture->GetSize().width > kDebugOverlayMaxWidth ||
+       texture->GetSize().height > kDebugOverlayMaxHeight)) {
+    gfxCriticalNote << "Diagnostic overlay exceeds invalidation area: %s"
+                    << ToString(texture->GetSize()).c_str();
+  }
+
+  struct DebugRect {
+    Rect bounds;
+    Rect texCoords;
+  };
+
+  if (!mDiagnosticVertices) {
+    DebugRect rect;
+    rect.bounds =
+        Rect(Point(kDebugOverlayX, kDebugOverlayY), Size(texture->GetSize()));
+    rect.texCoords = Rect(0.0, 0.0, 1.0, 1.0);
+
+    VertexStagingBuffer instances;
+    if (!instances.AppendItem(rect)) {
+      return;
+    }
+
+    mDiagnosticVertices = mDevice->CreateBuffer(
+        MLGBufferType::Vertex, instances.NumItems() * instances.SizeOfItem(),
+        MLGUsage::Immutable, instances.GetBufferStart());
+    if (!mDiagnosticVertices) {
+      return;
+    }
+  }
+
+  // Note: we rely on the world transform being correctly left bound by the
+  // outermost render view.
+  mDevice->SetScissorRect(Nothing());
+  mDevice->SetDepthTestMode(MLGDepthTestMode::Disabled);
+  mDevice->SetTopology(MLGPrimitiveTopology::UnitQuad);
+  mDevice->SetVertexShader(VertexShaderID::DiagnosticText);
+  mDevice->SetVertexBuffer(1, mDiagnosticVertices, sizeof(DebugRect));
+  mDevice->SetPixelShader(PixelShaderID::DiagnosticText);
+  mDevice->SetBlendState(MLGBlendState::Over);
+  mDevice->SetPSTexture(0, texture);
+  mDevice->SetSamplerMode(0, SamplerMode::Point);
+  mDevice->DrawInstanced(4, 1, 0, 0);
+}
+
+void LayerManagerMLGPU::ComputeInvalidRegion() {
+  // If invalidation is disabled, throw away cloned properties and redraw the
+  // whole target area.
+  if (!mUsingInvalidation) {
+    mInvalidRegion = mTarget ? mTargetRect : mRenderBounds;
+    mNextFrameInvalidRegion.SetEmpty();
+    return;
+  }
+
+  nsIntRegion changed;
+  if (mClonedLayerTreeProperties) {
+    if (!mClonedLayerTreeProperties->ComputeDifferences(mRoot, changed,
+                                                        nullptr)) {
+      changed = mRenderBounds;
+    }
+  } else {
+    changed = mRenderBounds;
+  }
+
+  // We compute the change region, but if we're painting to a target, we save
+  // it for the next frame instead.
+  if (mTarget) {
+    mInvalidRegion = mTargetRect;
+    mNextFrameInvalidRegion.OrWith(changed);
+  } else {
+    mInvalidRegion = std::move(mNextFrameInvalidRegion);
+    mInvalidRegion.OrWith(changed);
+  }
+}
+
+void LayerManagerMLGPU::AddInvalidRegion(const nsIntRegion& aRegion) {
+  mNextFrameInvalidRegion.OrWith(aRegion);
+}
+
+TextureSourceProvider* LayerManagerMLGPU::GetTextureSourceProvider() const {
+  return mTextureSourceProvider;
+}
+
+bool LayerManagerMLGPU::IsCompositingToScreen() const { return !mTarget; }
+
+bool LayerManagerMLGPU::AreComponentAlphaLayersEnabled() {
+  return LayerManager::AreComponentAlphaLayersEnabled();
+}
+
+bool LayerManagerMLGPU::BlendingRequiresIntermediateSurface() { return true; }
+
+void LayerManagerMLGPU::EndTransaction(DrawPaintedLayerCallback aCallback,
+                                       void* aCallbackData,
+                                       EndTransactionFlags aFlags) {
+  MOZ_CRASH("GFX: Use EndTransaction(aTimeStamp)");
+}
+
+void LayerManagerMLGPU::ClearCachedResources(Layer* aSubtree) {
+  Layer* root = aSubtree ? aSubtree : mRoot.get();
+  if (!root) {
+    return;
+  }
+
+  ForEachNode<ForwardIterator>(root, [](Layer* aLayer) {
+    LayerMLGPU* layer = aLayer->AsHostLayer()->AsLayerMLGPU();
+    if (!layer) {
+      return;
+    }
+    layer->ClearCachedResources();
+  });
+}
+
+void LayerManagerMLGPU::NotifyShadowTreeTransaction() {
+  if (StaticPrefs::layers_acceleration_draw_fps()) {
+    mDiagnostics->AddTxnFrame();
+  }
+}
+
+void LayerManagerMLGPU::UpdateRenderBounds(const gfx::IntRect& aRect) {
+  mRenderBounds = aRect;
+}
+
+bool LayerManagerMLGPU::PreRender() {
+  AUTO_PROFILER_LABEL("LayerManagerMLGPU::PreRender", GRAPHICS);
+
+  widget::WidgetRenderingContext context;
+  if (!mWidget->PreRender(&context)) {
+    return false;
+  }
+  mWidgetContext = Some(context);
+  return true;
+}
+
+void LayerManagerMLGPU::PostRender() {
+  mWidget->PostRender(mWidgetContext.ptr());
+  mProfilerScreenshotGrabber.MaybeProcessQueue();
+  mWidgetContext = Nothing();
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/LayerManagerMLGPU.h b/gfx/layers/mlgpu/LayerManagerMLGPU.h
new file mode 100644
index 0000000000..438779a337
--- /dev/null
+++ b/gfx/layers/mlgpu/LayerManagerMLGPU.h
@@ -0,0 +1,145 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_GFX_LAYERMANAGERMLGPU_H
+#define MOZILLA_GFX_LAYERMANAGERMLGPU_H
+
+#include <cstdint>                    // for uint32_t
+#include "mozilla/AlreadyAddRefed.h"  // for already_AddRefed
+#include "mozilla/Assertions.h"  // for AssertionConditionType, MOZ_ASSERT, MOZ_ASSERT_HELPER1
+#include "mozilla/Maybe.h"                   // for Maybe
+#include "mozilla/RefPtr.h"                  // for RefPtr
+#include "mozilla/TimeStamp.h"               // for TimeStamp
+#include "mozilla/UniquePtr.h"               // for UniquePtr
+#include "mozilla/gfx/Rect.h"                // for IntRect
+#include "mozilla/layers/CompositorTypes.h"  // for TextureFactoryIdentifier
+#include "mozilla/layers/LayerManager.h"  // for LayerManager::EndTransactionFlags, LayerManager::DrawPaintedLayerCallback
+#include "mozilla/layers/LayerManagerComposite.h"   // for HostLayerManager
+#include "mozilla/layers/LayersTypes.h"             // for LayersBackend
+#include "mozilla/layers/MLGPUScreenshotGrabber.h"  // for MLGPUScreenshotGrabber
+#include "nsRegion.h"                               // for nsIntRegion
+#include "nsStringFwd.h"                            // for nsCString
+
+namespace mozilla {
+namespace layers {
+
+class FrameBuilder;
+class RenderPassMLGPU;
+class SharedBufferMLGPU;
+class TextRenderer;
+class TextureSourceProviderMLGPU;
+class MLGBuffer;
+class MLGDevice;
+class MLGSwapChain;
+class MLGTileBuffer;
+struct LayerProperties;
+
+class LayerManagerMLGPU final : public HostLayerManager {
+ public:
+  explicit LayerManagerMLGPU(widget::CompositorWidget* aWidget);
+  virtual ~LayerManagerMLGPU();
+
+  bool Initialize();
+  void Destroy() override;
+
+  // LayerManager methods
+  bool BeginTransaction(const nsCString& aURL) override;
+  void BeginTransactionWithDrawTarget(gfx::DrawTarget* aTarget,
+                                      const gfx::IntRect& aRect) override;
+  void SetRoot(Layer* aLayer) override;
+  already_AddRefed<PaintedLayer> CreatePaintedLayer() override;
+  already_AddRefed<ContainerLayer> CreateContainerLayer() override;
+  already_AddRefed<ImageLayer> CreateImageLayer() override;
+  already_AddRefed<ColorLayer> CreateColorLayer() override;
+  already_AddRefed<CanvasLayer> CreateCanvasLayer() override;
+  already_AddRefed<RefLayer> CreateRefLayer() override;
+
+  bool AreComponentAlphaLayersEnabled() override;
+  bool BlendingRequiresIntermediateSurface() override;
+
+  // HostLayerManager methods
+  void ForcePresent() override;
+  TextureFactoryIdentifier GetTextureFactoryIdentifier() override;
+  LayersBackend GetBackendType() override;
+  void AddInvalidRegion(const nsIntRegion& aRegion) override;
+  void EndTransaction(const TimeStamp& aTimeStamp,
+                      EndTransactionFlags aFlags) override;
+  void EndTransaction(DrawPaintedLayerCallback aCallback, void* aCallbackData,
+                      EndTransactionFlags aFlags) override;
+  Compositor* GetCompositor() const override { return nullptr; }
+  bool IsCompositingToScreen() const override;
+  TextureSourceProvider* GetTextureSourceProvider() const override;
+  void ClearCachedResources(Layer* aSubtree = nullptr) override;
+  void NotifyShadowTreeTransaction() override;
+  void UpdateRenderBounds(const gfx::IntRect& aRect) override;
+
+  void InvalidateAll() override {
+    AddInvalidRegion(nsIntRegion(mRenderBounds));
+  }
+
+  LayerManagerMLGPU* AsLayerManagerMLGPU() override { return this; }
+  const char* Name() const override { return ""; }
+
+  // This should only be called while a FrameBuilder is live.
+  FrameBuilder* GetCurrentFrame() const {
+    MOZ_ASSERT(mCurrentFrame);
+    return mCurrentFrame;
+  }
+  MLGDevice* GetDevice() { return mDevice; }
+
+  TimeStamp GetLastCompositionEndTime() const {
+    return mLastCompositionEndTime;
+  }
+  const nsIntRegion& GetRegionToClear() const { return mRegionToClear; }
+  uint32_t GetDebugFrameNumber() const { return mDebugFrameNumber; }
+
+ private:
+  void Composite();
+  void ComputeInvalidRegion();
+  void RenderLayers();
+  void DrawDebugOverlay();
+  bool PreRender();
+  void PostRender();
+
+ private:
+  RefPtr<MLGDevice> mDevice;
+  RefPtr<MLGSwapChain> mSwapChain;
+  RefPtr<TextureSourceProviderMLGPU> mTextureSourceProvider;
+  RefPtr<TextRenderer> mTextRenderer;
+  widget::CompositorWidget* mWidget;
+
+  UniquePtr<LayerProperties> mClonedLayerTreeProperties;
+  nsIntRegion mNextFrameInvalidRegion;
+  gfx::IntRect mRenderBounds;
+
+  // These are per-frame only.
+  bool mDrawDiagnostics;
+  bool mUsingInvalidation;
+  nsIntRegion mInvalidRegion;
+  Maybe<widget::WidgetRenderingContext> mWidgetContext;
+
+  IntSize mWindowSize;
+  TimeStamp mCompositionStartTime;
+  TimeStamp mLastCompositionEndTime;
+
+  RefPtr<DrawTarget> mTarget;
+  gfx::IntRect mTargetRect;
+  FrameBuilder* mCurrentFrame;
+
+  // The debug frame number is incremented every frame and is included in the
+  // WorldConstants bound to vertex shaders. This allows us to correlate
+  // a frame in RenderDoc to spew in the console.
+  uint32_t mDebugFrameNumber;
+  RefPtr<MLGBuffer> mDiagnosticVertices;
+
+  // Screenshotting for the profiler.
+  MLGPUScreenshotGrabber mProfilerScreenshotGrabber;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif
diff --git a/gfx/layers/mlgpu/MLGDevice.cpp b/gfx/layers/mlgpu/MLGDevice.cpp
new file mode 100644
index 0000000000..ca6e35e49d
--- /dev/null
+++ b/gfx/layers/mlgpu/MLGDevice.cpp
@@ -0,0 +1,348 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MLGDevice.h"
+#include "mozilla/layers/TextureHost.h"
+#include "BufferCache.h"
+#include "ClearRegionHelper.h"
+#include "gfxConfig.h"
+#include "mozilla/StaticPrefs_layers.h"
+#include "gfxUtils.h"
+#include "ShaderDefinitionsMLGPU.h"
+#include "SharedBufferMLGPU.h"
+#include "UtilityMLGPU.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace gfx;
+using namespace mlg;
+
+MLGRenderTarget::MLGRenderTarget(MLGRenderTargetFlags aFlags)
+    : mFlags(aFlags), mLastDepthStart(-1) {}
+
+MLGSwapChain::MLGSwapChain() : mIsDoubleBuffered(false) {}
+
+bool MLGSwapChain::ApplyNewInvalidRegion(
+    nsIntRegion&& aRegion, const Maybe<gfx::IntRect>& aExtraRect) {
+  // We clamp the invalid region to the backbuffer size, otherwise the present
+  // can fail.
+  IntRect bounds(IntPoint(0, 0), GetSize());
+  nsIntRegion invalid = std::move(aRegion);
+  invalid.AndWith(bounds);
+  if (invalid.IsEmpty()) {
+    return false;
+  }
+
+  if (aExtraRect) {
+    IntRect rect = aExtraRect.value().Intersect(bounds);
+    if (!rect.IsEmpty()) {
+      invalid.OrWith(rect);
+    }
+  }
+
+  // This area is now invalid in the back and front buffers. Note that the front
+  // buffer is either totally valid or totally invalid, since either the last
+  // paint succeeded or was thrown out due to a buffer resize. Effectively, it
+  // will now contain the invalid region specific to this frame.
+  mBackBufferInvalid.OrWith(invalid);
+  AL_LOG("Backbuffer invalid region: %s\n",
+         Stringify(mBackBufferInvalid).c_str());
+
+  if (mIsDoubleBuffered) {
+    mFrontBufferInvalid.OrWith(invalid);
+    AL_LOG("Frontbuffer invalid region: %s\n",
+           Stringify(mFrontBufferInvalid).c_str());
+  }
+  return true;
+}
+
+MLGDevice::MLGDevice()
+    : mTopology(MLGPrimitiveTopology::Unknown),
+      mInitialized(false),
+      mIsValid(false),
+      mCanUseClearView(false),
+      mCanUseConstantBufferOffsetBinding(false),
+      mMaxConstantBufferBindSize(0) {}
+
+MLGDevice::~MLGDevice() = default;
+
+bool MLGDevice::Initialize() {
+  if (!mMaxConstantBufferBindSize) {
+    return Fail("FEATURE_FAILURE_NO_MAX_CB_BIND_SIZE",
+                "Failed to set a max constant buffer bind size");
+  }
+  if (mMaxConstantBufferBindSize < mlg::kMaxConstantBufferSize) {
+    // StagingBuffer depends on this value being accurate, so for now we just
+    // double-check it here.
+    return Fail("FEATURE_FAILURE_MIN_MAX_CB_BIND_SIZE",
+                "Minimum constant buffer bind size not met");
+  }
+
+  // We allow this to be pref'd off for testing. Switching it off enables
+  // Direct3D 11.0/Windows 7/OpenGL-style buffer code paths.
+  if (!StaticPrefs::layers_mlgpu_enable_buffer_sharing_AtStartup()) {
+    gfxConfig::EnableFallback(Fallback::NO_CONSTANT_BUFFER_OFFSETTING,
+                              "Disabled by pref");
+    mCanUseConstantBufferOffsetBinding = false;
+  }
+  if (mCanUseConstantBufferOffsetBinding && !VerifyConstantBufferOffsetting()) {
+    gfxConfig::EnableFallback(Fallback::NO_CONSTANT_BUFFER_OFFSETTING,
+                              "Constant buffer offset binding does not work");
+    mCanUseConstantBufferOffsetBinding = false;
+  }
+
+  // We allow this to be pref'd off for testing. Disabling it turns on
+  // ID3D11DeviceContext1::ClearView support, which is present on
+  // newer Windows 8+ drivers.
+  if (!StaticPrefs::layers_mlgpu_enable_clear_view_AtStartup()) {
+    mCanUseClearView = false;
+  }
+
+  // When compositing normal sized layer trees, we typically have small vertex
+  // buffers. Empirically the vertex and pixel constant buffer sizes are
+  // generally under 1KB and the vertex constant buffer size is under 8KB.
+  static const size_t kDefaultVertexBufferSize = 4096;
+  static const size_t kDefaultVSConstantBufferSize =
+      512 * kConstantBufferElementSize;
+  static const size_t kDefaultPSConstantBufferSize =
+      256 * kConstantBufferElementSize;
+
+  // Note: we create these after we've verified all the device-specific
+  // properties above.
+  mSharedVertexBuffer =
+      MakeUnique<SharedVertexBuffer>(this, kDefaultVertexBufferSize);
+  mSharedVSBuffer =
+      MakeUnique<SharedConstantBuffer>(this, kDefaultVSConstantBufferSize);
+  mSharedPSBuffer =
+      MakeUnique<SharedConstantBuffer>(this, kDefaultPSConstantBufferSize);
+
+  if (!mSharedVertexBuffer->Init() || !mSharedVSBuffer->Init() ||
+      !mSharedPSBuffer->Init()) {
+    return Fail("FEATURE_FAILURE_ALLOC_SHARED_BUFFER",
+                "Failed to allocate a shared shader buffer");
+  }
+
+  if (StaticPrefs::layers_mlgpu_enable_buffer_cache_AtStartup()) {
+    mConstantBufferCache = MakeUnique<BufferCache>(this);
+  }
+
+  mInitialized = true;
+  mIsValid = true;
+  return true;
+}
+
+void MLGDevice::BeginFrame() {
+  mSharedVertexBuffer->Reset();
+  mSharedPSBuffer->Reset();
+  mSharedVSBuffer->Reset();
+}
+
+void MLGDevice::EndFrame() {
+  if (mConstantBufferCache) {
+    mConstantBufferCache->EndFrame();
+  }
+}
+
+void MLGDevice::FinishSharedBufferUse() {
+  mSharedVertexBuffer->PrepareForUsage();
+  mSharedPSBuffer->PrepareForUsage();
+  mSharedVSBuffer->PrepareForUsage();
+}
+
+void MLGDevice::SetTopology(MLGPrimitiveTopology aTopology) {
+  if (mTopology == aTopology) {
+    return;
+  }
+  SetPrimitiveTopology(aTopology);
+  mTopology = aTopology;
+}
+
+void MLGDevice::SetVertexBuffer(uint32_t aSlot,
+                                const VertexBufferSection* aSection) {
+  if (!aSection->IsValid()) {
+    return;
+  }
+  SetVertexBuffer(aSlot, aSection->GetBuffer(), aSection->Stride(),
+                  aSection->Offset());
+}
+
+void MLGDevice::SetPSConstantBuffer(uint32_t aSlot,
+                                    const ConstantBufferSection* aSection) {
+  if (!aSection->IsValid()) {
+    return;
+  }
+
+  MLGBuffer* buffer = aSection->GetBuffer();
+
+  if (aSection->HasOffset()) {
+    uint32_t first = aSection->Offset();
+    uint32_t numConstants = aSection->NumConstants();
+    SetPSConstantBuffer(aSlot, buffer, first, numConstants);
+  } else {
+    SetPSConstantBuffer(aSlot, buffer);
+  }
+}
+
+void MLGDevice::SetVSConstantBuffer(uint32_t aSlot,
+                                    const ConstantBufferSection* aSection) {
+  if (!aSection->IsValid()) {
+    return;
+  }
+
+  MLGBuffer* buffer = aSection->GetBuffer();
+
+  if (aSection->HasOffset()) {
+    uint32_t first = aSection->Offset();
+    uint32_t numConstants = aSection->NumConstants();
+    SetVSConstantBuffer(aSlot, buffer, first, numConstants);
+  } else {
+    SetVSConstantBuffer(aSlot, buffer);
+  }
+}
+
+void MLGDevice::SetPSTexturesYUV(uint32_t aSlot, TextureSource* aTexture) {
+  // Note, we don't support tiled YCbCr textures.
+  const int Y = 0, Cb = 1, Cr = 2;
+  TextureSource* textures[3] = {aTexture->GetSubSource(Y),
+                                aTexture->GetSubSource(Cb),
+                                aTexture->GetSubSource(Cr)};
+  MOZ_ASSERT(textures[0]);
+  MOZ_ASSERT(textures[1]);
+  MOZ_ASSERT(textures[2]);
+
+  SetPSTextures(0, 3, textures);
+}
+
+void MLGDevice::SetPSTexture(uint32_t aSlot, TextureSource* aSource) {
+  SetPSTextures(aSlot, 1, &aSource);
+}
+
+void MLGDevice::SetSamplerMode(uint32_t aIndex, gfx::SamplingFilter aFilter) {
+  SetSamplerMode(aIndex, FilterToSamplerMode(aFilter));
+}
+
+bool MLGDevice::Fail(const nsCString& aFailureId, const nsCString* aMessage) {
+  const char* message =
+      aMessage ? aMessage->get() : "Failed initializing MLGDeviceD3D11";
+  gfxWarning() << "Failure initializing MLGDeviceD3D11: " << message;
+  mFailureId = aFailureId;
+  mFailureMessage = message;
+  return false;
+}
+
+void MLGDevice::UnmapSharedBuffers() {
+  mSharedVertexBuffer->Reset();
+  mSharedPSBuffer->Reset();
+  mSharedVSBuffer->Reset();
+}
+
+RefPtr<MLGBuffer> MLGDevice::GetBufferForColorSpace(YUVColorSpace aColorSpace) {
+  if (mColorSpaceBuffers[aColorSpace]) {
+    return mColorSpaceBuffers[aColorSpace];
+  }
+
+  YCbCrShaderConstants buffer;
+  memcpy(&buffer.yuvColorMatrix,
+         gfxUtils::YuvToRgbMatrix4x3RowMajor(aColorSpace),
+         sizeof(buffer.yuvColorMatrix));
+
+  RefPtr<MLGBuffer> resource = CreateBuffer(
+      MLGBufferType::Constant, sizeof(buffer), MLGUsage::Immutable, &buffer);
+  if (!resource) {
+    return nullptr;
+  }
+
+  mColorSpaceBuffers[aColorSpace] = resource;
+  return resource;
+}
+
+RefPtr<MLGBuffer> MLGDevice::GetBufferForColorDepthCoefficient(
+    ColorDepth aColorDepth) {
+  if (mColorDepthBuffers[aColorDepth]) {
+    return mColorDepthBuffers[aColorDepth];
+  }
+
+  YCbCrColorDepthConstants buffer;
+  buffer.coefficient = gfx::RescalingFactorForColorDepth(aColorDepth);
+
+  RefPtr<MLGBuffer> resource = CreateBuffer(
+      MLGBufferType::Constant, sizeof(buffer), MLGUsage::Immutable, &buffer);
+  if (!resource) {
+    return nullptr;
+  }
+
+  mColorDepthBuffers[aColorDepth] = resource;
+  return resource;
+}
+
+bool MLGDevice::Synchronize() { return true; }
+
+void MLGDevice::PrepareClearRegion(ClearRegionHelper* aOut,
+                                   nsTArray<gfx::IntRect>&& aRects,
+                                   const Maybe<int32_t>& aSortIndex) {
+  if (CanUseClearView() && !aSortIndex) {
+    aOut->mRects = std::move(aRects);
+    return;
+  }
+
+  mSharedVertexBuffer->Allocate(&aOut->mInput, aRects.Length(), sizeof(IntRect),
+                                aRects.Elements());
+
+  ClearConstants consts(aSortIndex ? aSortIndex.value() : 1);
+  mSharedVSBuffer->Allocate(&aOut->mVSBuffer, consts);
+}
+
+void MLGDevice::DrawClearRegion(const ClearRegionHelper& aHelper) {
+  // If we've set up vertices for a shader-based clear, execute that now.
+  if (aHelper.mInput.IsValid()) {
+    SetTopology(MLGPrimitiveTopology::UnitQuad);
+    SetVertexShader(VertexShaderID::Clear);
+    SetVertexBuffer(1, &aHelper.mInput);
+    SetVSConstantBuffer(kClearConstantBufferSlot, &aHelper.mVSBuffer);
+    SetBlendState(MLGBlendState::Copy);
+    SetPixelShader(PixelShaderID::Clear);
+    DrawInstanced(4, aHelper.mInput.NumVertices(), 0, 0);
+    return;
+  }
+
+  // Otherwise, if we have a normal rect list, we wanted to use the faster
+  // ClearView.
+  if (!aHelper.mRects.IsEmpty()) {
+    DeviceColor color(0.0, 0.0, 0.0, 0.0);
+    ClearView(mCurrentRT, color, aHelper.mRects.Elements(),
+              aHelper.mRects.Length());
+  }
+}
+
+void MLGDevice::WriteAsPNG(MLGTexture* aTexture, const char* aPath) {
+  MLGMappedResource map;
+  if (!Map(aTexture, MLGMapType::READ, &map)) {
+    return;
+  }
+
+  RefPtr<DataSourceSurface> surface = Factory::CreateWrappingDataSourceSurface(
+      map.mData, map.mStride, aTexture->GetSize(), SurfaceFormat::B8G8R8A8);
+  gfxUtils::WriteAsPNG(surface, aPath);
+
+  Unmap(aTexture);
+}
+
+RefPtr<MLGTexture> MLGDevice::CopyAndCreateReadbackTexture(
+    MLGTexture* aTexture) {
+  RefPtr<MLGTexture> copy =
+      CreateTexture(aTexture->GetSize(), SurfaceFormat::B8G8R8A8,
+                    MLGUsage::Staging, MLGTextureFlags::None);
+  if (!copy) {
+    return nullptr;
+  }
+  CopyTexture(copy, IntPoint(0, 0), aTexture,
+              IntRect(IntPoint(0, 0), aTexture->GetSize()));
+  return copy;
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/MLGDevice.h b/gfx/layers/mlgpu/MLGDevice.h
new file mode 100644
index 0000000000..a8e49add2c
--- /dev/null
+++ b/gfx/layers/mlgpu/MLGDevice.h
@@ -0,0 +1,481 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_MLGDevice_h
+#define mozilla_gfx_layers_mlgpu_MLGDevice_h
+
+#include "mozilla/Assertions.h"  // for MOZ_ASSERT, etc
+#include "mozilla/EnumeratedArray.h"
+#include "mozilla/RefPtr.h"  // for already_AddRefed, RefCounted
+#include "mozilla/TypedEnumBits.h"
+#include "mozilla/WidgetUtils.h"
+#include "mozilla/gfx/Types.h"
+#include "mozilla/layers/CompositorTypes.h"
+#include "mozilla/layers/LayersTypes.h"
+#include "ImageTypes.h"
+#include "MLGDeviceTypes.h"
+#include "nsISupportsImpl.h"
+#include "nsString.h"
+#include "nsPrintfCString.h"
+
+namespace mozilla {
+
+namespace widget {
+class CompositorWidget;
+}  // namespace widget
+namespace gfx {
+class DrawTarget;
+}  // namespace gfx
+
+namespace layers {
+
+struct GPUStats;
+class BufferCache;
+class ConstantBufferSection;
+class DataTextureSource;
+class MLGBufferD3D11;
+class MLGDeviceD3D11;
+class MLGRenderTargetD3D11;
+class MLGResourceD3D11;
+class MLGTexture;
+class MLGTextureD3D11;
+class SharedVertexBuffer;
+class SharedConstantBuffer;
+class TextureSource;
+class VertexBufferSection;
+struct ClearRegionHelper;
+
+class MLGRenderTarget {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MLGRenderTarget)
+
+  virtual gfx::IntSize GetSize() const = 0;
+  virtual MLGRenderTargetD3D11* AsD3D11() { return nullptr; }
+
+  // Returns the underlying texture of the render target.
+  virtual MLGTexture* GetTexture() = 0;
+
+  bool HasDepthBuffer() const {
+    return (mFlags & MLGRenderTargetFlags::ZBuffer) ==
+           MLGRenderTargetFlags::ZBuffer;
+  }
+
+  int32_t GetLastDepthStart() const { return mLastDepthStart; }
+  void SetLastDepthStart(int32_t aDepthStart) { mLastDepthStart = aDepthStart; }
+
+ protected:
+  explicit MLGRenderTarget(MLGRenderTargetFlags aFlags);
+  virtual ~MLGRenderTarget() = default;
+
+ protected:
+  MLGRenderTargetFlags mFlags;
+
+  // When using a depth buffer, callers can track the range of depth values
+  // that were last used.
+  int32_t mLastDepthStart;
+};
+
+class MLGSwapChain {
+ protected:
+  virtual ~MLGSwapChain() = default;
+
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MLGSwapChain)
+
+  virtual RefPtr<MLGRenderTarget> AcquireBackBuffer() = 0;
+  virtual bool ResizeBuffers(const gfx::IntSize& aSize) = 0;
+  virtual gfx::IntSize GetSize() const = 0;
+
+  // Present to the screen.
+  virtual void Present() = 0;
+
+  // Force a present without waiting for the previous frame's present to
+  // complete.
+  virtual void ForcePresent() = 0;
+
+  // Copy an area of the backbuffer to a draw target.
+  virtual void CopyBackbuffer(gfx::DrawTarget* aTarget,
+                              const gfx::IntRect& aBounds) = 0;
+
+  // Free any internal resources.
+  virtual void Destroy() = 0;
+
+  // Give the new invalid region to the swap chain in preparation for
+  // acquiring the backbuffer. If the new invalid region is empty,
+  // this returns false and no composite is required.
+  //
+  // The extra rect is used for the debug overlay, which is factored in
+  // separately to avoid causing unnecessary composites.
+  bool ApplyNewInvalidRegion(nsIntRegion&& aRegion,
+                             const Maybe<gfx::IntRect>& aExtraRect);
+
+  const nsIntRegion& GetBackBufferInvalidRegion() const {
+    return mBackBufferInvalid;
+  }
+
+ protected:
+  MLGSwapChain();
+
+ protected:
+  gfx::IntSize mLastPresentSize;
+  // The swap chain tracks the invalid region of its buffers. After presenting,
+  // the invalid region for the backbuffer is cleared. If using double
+  // buffering, it is set to the area of the non-presented buffer that was not
+  // painted this frame. The initial invalid region each frame comes from
+  // LayerManagerMLGPU, and is combined with the back buffer's invalid region
+  // before frame building begins.
+  nsIntRegion mBackBufferInvalid;
+  nsIntRegion mFrontBufferInvalid;
+  bool mIsDoubleBuffered;
+};
+
+class MLGResource {
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MLGResource)
+
+ public:
+  enum class Type { Buffer, Texture };
+
+  virtual Type GetType() const = 0;
+  virtual MLGResourceD3D11* AsResourceD3D11() { return nullptr; }
+
+ protected:
+  virtual ~MLGResource() = default;
+};
+
+// A buffer for use as a shader input.
+class MLGBuffer : public MLGResource {
+ public:
+  Type GetType() const override { return Type::Buffer; }
+  virtual MLGBufferD3D11* AsD3D11() { return nullptr; }
+  virtual size_t GetSize() const = 0;
+
+ protected:
+  virtual ~MLGBuffer() = default;
+};
+
+// This is a lower-level resource than a TextureSource. It wraps
+// a 2D texture.
+class MLGTexture : public MLGResource {
+ public:
+  Type GetType() const override { return Type::Texture; }
+  virtual MLGTextureD3D11* AsD3D11() { return nullptr; }
+  const gfx::IntSize& GetSize() const { return mSize; }
+
+ protected:
+  gfx::IntSize mSize;
+};
+
+enum class VertexShaderID {
+  TexturedQuad,
+  TexturedVertex,
+  ColoredQuad,
+  ColoredVertex,
+  BlendVertex,
+  Clear,
+  MaskCombiner,
+  DiagnosticText,
+  MaxShaders
+};
+
+enum class PixelShaderID {
+  ColoredQuad,
+  ColoredVertex,
+  TexturedQuadRGB,
+  TexturedQuadRGBA,
+  TexturedVertexRGB,
+  TexturedVertexRGBA,
+  TexturedQuadIMC4,
+  TexturedQuadIdentityIMC4,
+  TexturedQuadNV12,
+  TexturedVertexIMC4,
+  TexturedVertexIdentityIMC4,
+  TexturedVertexNV12,
+  ComponentAlphaQuad,
+  ComponentAlphaVertex,
+  BlendMultiply,
+  BlendScreen,
+  BlendOverlay,
+  BlendDarken,
+  BlendLighten,
+  BlendColorDodge,
+  BlendColorBurn,
+  BlendHardLight,
+  BlendSoftLight,
+  BlendDifference,
+  BlendExclusion,
+  BlendHue,
+  BlendSaturation,
+  BlendColor,
+  BlendLuminosity,
+  Clear,
+  MaskCombiner,
+  DiagnosticText,
+  MaxShaders
+};
+
+class MLGDevice {
+ public:
+  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(MLGDevice)
+
+  MLGDevice();
+
+  virtual bool Initialize();
+
+  virtual TextureFactoryIdentifier GetTextureFactoryIdentifier(
+      widget::CompositorWidget* aWidget) const = 0;
+  virtual int32_t GetMaxTextureSize() const = 0;
+  virtual LayersBackend GetLayersBackend() const = 0;
+
+  virtual RefPtr<MLGSwapChain> CreateSwapChainForWidget(
+      widget::CompositorWidget* aWidget) = 0;
+
+  // Markers for when we start and finish issuing "normal" (i.e., non-
+  // diagnostic) draw commands for the frame.
+  virtual void StartDiagnostics(uint32_t aInvalidPixels) = 0;
+  virtual void EndDiagnostics() = 0;
+  virtual void GetDiagnostics(GPUStats* aStats) = 0;
+
+  // Layers interaction.
+  virtual RefPtr<DataTextureSource> CreateDataTextureSource(
+      TextureFlags aFlags) = 0;
+
+  // Resource access
+  virtual bool Map(MLGResource* aResource, MLGMapType aType,
+                   MLGMappedResource* aMap) = 0;
+  virtual void Unmap(MLGResource* aResource) = 0;
+  virtual void UpdatePartialResource(MLGResource* aResource,
+                                     const gfx::IntRect* aRect, void* aData,
+                                     uint32_t aStride) = 0;
+  virtual void CopyTexture(MLGTexture* aDest, const gfx::IntPoint& aTarget,
+                           MLGTexture* aSource, const gfx::IntRect& aRect) = 0;
+
+  // Begin a frame. This clears and resets all shared buffers.
+  virtual void BeginFrame();
+  virtual void EndFrame();
+
+  // State setup commands.
+  virtual void SetRenderTarget(MLGRenderTarget* aRT) = 0;
+  virtual MLGRenderTarget* GetRenderTarget() = 0;
+  virtual void SetViewport(const gfx::IntRect& aRT) = 0;
+  virtual void SetScissorRect(const Maybe<gfx::IntRect>& aScissorRect) = 0;
+  virtual void SetVertexShader(VertexShaderID aVertexShader) = 0;
+  virtual void SetPixelShader(PixelShaderID aPixelShader) = 0;
+  virtual void SetSamplerMode(uint32_t aIndex, SamplerMode aSamplerMode) = 0;
+  virtual void SetBlendState(MLGBlendState aBlendState) = 0;
+  virtual void SetVertexBuffer(uint32_t aSlot, MLGBuffer* aBuffer,
+                               uint32_t aStride, uint32_t aOffset = 0) = 0;
+  virtual void SetVSConstantBuffer(uint32_t aSlot, MLGBuffer* aBuffer) = 0;
+  virtual void SetPSConstantBuffer(uint32_t aSlot, MLGBuffer* aBuffer) = 0;
+  virtual void SetPSTextures(uint32_t aSlot, uint32_t aNumTextures,
+                             TextureSource* const* aTextures) = 0;
+  virtual void SetPSTexture(uint32_t aSlot, MLGTexture* aTexture) = 0;
+  virtual void SetDepthTestMode(MLGDepthTestMode aMode) = 0;
+
+  // If supported, bind constant buffers at a particular offset. These can only
+  // be used if CanUseConstantBufferOffsetBinding returns true.
+  virtual void SetVSConstantBuffer(uint32_t aSlot, MLGBuffer* aBuffer,
+                                   uint32_t aFirstConstant,
+                                   uint32_t aNumConstants) = 0;
+  virtual void SetPSConstantBuffer(uint32_t aSlot, MLGBuffer* aBuffer,
+                                   uint32_t aFirstConstant,
+                                   uint32_t aNumConstants) = 0;
+
+  // Set the topology. No API call is made if the topology has not changed.
+  // The UnitQuad topology implicity binds a unit quad triangle strip as
+  // vertex buffer #0.
+  void SetTopology(MLGPrimitiveTopology aTopology);
+
+  // Set textures that have special binding logic, and bind to multiple slots.
+  virtual void SetPSTexturesNV12(uint32_t aSlot, TextureSource* aTexture) = 0;
+  void SetPSTexturesYUV(uint32_t aSlot, TextureSource* aTexture);
+
+  virtual RefPtr<MLGBuffer> CreateBuffer(
+      MLGBufferType aType, uint32_t aSize, MLGUsage aUsage,
+      const void* aInitialData = nullptr) = 0;
+
+  virtual RefPtr<MLGTexture> CreateTexture(const gfx::IntSize& aSize,
+                                           gfx::SurfaceFormat aFormat,
+                                           MLGUsage aUsage,
+                                           MLGTextureFlags aFlags) = 0;
+
+  // Unwrap the underlying GPU texture in the given TextureSource, and re-wrap
+  // it in an MLGTexture structure.
+  virtual RefPtr<MLGTexture> CreateTexture(TextureSource* aSource) = 0;
+
+  virtual RefPtr<MLGRenderTarget> CreateRenderTarget(
+      const gfx::IntSize& aSize,
+      MLGRenderTargetFlags aFlags = MLGRenderTargetFlags::Default) = 0;
+
+  // Clear a render target to the given color, or clear a depth buffer.
+  virtual void Clear(MLGRenderTarget* aRT, const gfx::DeviceColor& aColor) = 0;
+  virtual void ClearDepthBuffer(MLGRenderTarget* aRT) = 0;
+
+  // This is only available if CanUseClearView() returns true.
+  virtual void ClearView(MLGRenderTarget* aRT, const gfx::DeviceColor& aColor,
+                         const gfx::IntRect* aRects, size_t aNumRects) = 0;
+
+  // Drawing Commands
+  virtual void Draw(uint32_t aVertexCount, uint32_t aOffset) = 0;
+  virtual void DrawInstanced(uint32_t aVertexCountPerInstance,
+                             uint32_t aInstanceCount, uint32_t aVertexOffset,
+                             uint32_t aInstanceOffset) = 0;
+  virtual void Flush() = 0;
+
+  // This unlocks any textures that were implicitly locked during drawing.
+  virtual void UnlockAllTextures() = 0;
+
+  virtual MLGDeviceD3D11* AsD3D11() { return nullptr; }
+
+  // Helpers.
+  void SetVertexBuffer(uint32_t aSlot, const VertexBufferSection* aSection);
+  void SetPSConstantBuffer(uint32_t aSlot,
+                           const ConstantBufferSection* aSection);
+  void SetVSConstantBuffer(uint32_t aSlot,
+                           const ConstantBufferSection* aSection);
+  void SetPSTexture(uint32_t aSlot, TextureSource* aSource);
+  void SetSamplerMode(uint32_t aIndex, gfx::SamplingFilter aFilter);
+
+  // This creates or returns a previously created constant buffer, containing
+  // a YCbCrShaderConstants instance.
+  RefPtr<MLGBuffer> GetBufferForColorSpace(gfx::YUVColorSpace aColorSpace);
+  // This creates or returns a previously created constant buffer, containing
+  // a YCbCrBitDepthConstants instance.
+  RefPtr<MLGBuffer> GetBufferForColorDepthCoefficient(
+      gfx::ColorDepth aColorDepth);
+
+  // A shared buffer that can be used to build VertexBufferSections.
+  SharedVertexBuffer* GetSharedVertexBuffer() {
+    return mSharedVertexBuffer.get();
+  }
+  // A shared buffer that can be used to build ConstantBufferSections. Intended
+  // to be used with vertex shaders.
+  SharedConstantBuffer* GetSharedVSBuffer() { return mSharedVSBuffer.get(); }
+  // A shared buffer that can be used to build ConstantBufferSections. Intended
+  // to be used with pixel shaders.
+  SharedConstantBuffer* GetSharedPSBuffer() { return mSharedPSBuffer.get(); }
+  // A cache for constant buffers, used when offset-based binding is not
+  // supported.
+  BufferCache* GetConstantBufferCache() { return mConstantBufferCache.get(); }
+
+  // Unmap and upload all shared buffers to the GPU.
+  void FinishSharedBufferUse();
+
+  // These are used to detect and report initialization failure.
+  virtual bool IsValid() const { return mInitialized && mIsValid; }
+  const nsCString& GetFailureId() const { return mFailureId; }
+  const nsCString& GetFailureMessage() const { return mFailureMessage; }
+
+  // Prepare a clear-region operation to be run at a later time.
+  void PrepareClearRegion(ClearRegionHelper* aOut,
+                          nsTArray<gfx::IntRect>&& aRects,
+                          const Maybe<int32_t>& aSortIndex);
+
+  // Execute a clear-region operation. This may change shader state.
+  void DrawClearRegion(const ClearRegionHelper& aHelper);
+
+  // If supported, synchronize with the SyncObject given to clients.
+  virtual bool Synchronize();
+
+  // If this returns true, ClearView() can be called.
+  bool CanUseClearView() const { return mCanUseClearView; }
+
+  // If this returns true, constant buffers can be bound at specific offsets for
+  // a given run of bytes. This is only supported on Windows 8+ for Direct3D 11.
+  bool CanUseConstantBufferOffsetBinding() const {
+    return mCanUseConstantBufferOffsetBinding;
+  }
+
+  // Return the maximum number of elements that can be bound to a constant
+  // buffer. This is different than the maximum size of a buffer (there is
+  // no such limit on Direct3D 11.1).
+  //
+  // The return value must be a power of two.
+  size_t GetMaxConstantBufferBindSize() const {
+    return mMaxConstantBufferBindSize;
+  }
+
+  // Helper function for unbinding textures since SetPSTexture is overloaded.
+  void UnsetPSTexture(uint32_t aSlot) {
+    TextureSource* nullTexture = nullptr;
+    SetPSTexture(aSlot, nullTexture);
+  }
+
+  // Debugging helper function for dumping an MLGTexture to a file.
+  void WriteAsPNG(MLGTexture* aTexture, const char* aPath);
+
+  // Debugging helper function for copying a texture for later dumping to a
+  // file.
+  RefPtr<MLGTexture> CopyAndCreateReadbackTexture(MLGTexture* aTexture);
+
+ protected:
+  virtual ~MLGDevice();
+
+  virtual void SetPrimitiveTopology(MLGPrimitiveTopology aTopology) = 0;
+
+  // Optionally run a runtime test to determine if constant buffer offset
+  // binding works.
+  virtual bool VerifyConstantBufferOffsetting() { return true; }
+
+  // Used during initialization to record failure reasons.
+  bool Fail(const nsCString& aFailureId, const nsCString* aMessage);
+
+  // Used during initialization to record failure reasons. Note: our
+  // MOZ_FORMAT_PRINTF macro does not work on this function, so we
+  // disable the warning.
+#if defined(__GNUC__)
+#  pragma GCC diagnostic push
+#  pragma GCC diagnostic ignored "-Wformat-security"
+#endif
+  template <typename... T>
+  bool Fail(const char* aFailureId) {
+    nsCString failureId(aFailureId);
+    return Fail(failureId, nullptr);
+  }
+  template <typename... T>
+  bool Fail(const char* aFailureId, const char* aMessage, const T&... args) {
+    nsCString failureId(aFailureId);
+    nsPrintfCString message(aMessage, args...);
+    return Fail(failureId, &message);
+  }
+#if defined(__GNUC__)
+#  pragma GCC diagnostic pop
+#endif
+
+  void UnmapSharedBuffers();
+
+ private:
+  MLGPrimitiveTopology mTopology;
+  UniquePtr<SharedVertexBuffer> mSharedVertexBuffer;
+  UniquePtr<SharedConstantBuffer> mSharedVSBuffer;
+  UniquePtr<SharedConstantBuffer> mSharedPSBuffer;
+  UniquePtr<BufferCache> mConstantBufferCache;
+
+  nsCString mFailureId;
+  nsCString mFailureMessage;
+  bool mInitialized;
+
+  typedef EnumeratedArray<gfx::YUVColorSpace, gfx::YUVColorSpace::UNKNOWN,
+                          RefPtr<MLGBuffer>>
+      ColorSpaceArray;
+  ColorSpaceArray mColorSpaceBuffers;
+  typedef EnumeratedArray<gfx::ColorDepth, gfx::ColorDepth::UNKNOWN,
+                          RefPtr<MLGBuffer>>
+      ColorDepthArray;
+  ColorDepthArray mColorDepthBuffers;
+
+ protected:
+  bool mIsValid;
+  bool mCanUseClearView;
+  bool mCanUseConstantBufferOffsetBinding;
+  size_t mMaxConstantBufferBindSize;
+
+  RefPtr<MLGRenderTarget> mCurrentRT;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_MLGDevice_h
diff --git a/gfx/layers/mlgpu/MLGDeviceTypes.h b/gfx/layers/mlgpu/MLGDeviceTypes.h
new file mode 100644
index 0000000000..3af46e6a36
--- /dev/null
+++ b/gfx/layers/mlgpu/MLGDeviceTypes.h
@@ -0,0 +1,102 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_MLGDeviceTypes_h
+#define mozilla_gfx_layers_mlgpu_MLGDeviceTypes_h
+
+#include "mozilla/TypedEnumBits.h"
+#include "mozilla/gfx/Types.h"
+
+namespace mozilla {
+namespace layers {
+
+enum class MLGUsage {
+  // GPU read-only, CPU write once on creation and read/write never.
+  Immutable,
+
+  // GPU read-only, CPU write-only. Must be mapped with WRITE_DISCARD.
+  Dynamic,
+
+  // GPU read/write-only, no CPU access.
+  Default,
+
+  // GPU->CPU transfer, and read from the CPU.
+  Staging
+};
+
+enum class MLGDepthTestMode {
+  Disabled,
+  Write,
+  ReadOnly,
+  AlwaysWrite,
+  MaxModes
+};
+
+enum class MLGBufferType : uint32_t { Vertex, Constant };
+
+enum class SamplerMode {
+  // Linear filter, clamped to border.
+  LinearClamp = 0,
+  // Linear filter, clamped to transparent pixels.
+  LinearClampToZero,
+  // Linear filter, wrap edges.
+  LinearRepeat,
+  // Point filter, clamped to border.
+  Point,
+  MaxModes
+};
+
+enum class MLGBlendState {
+  Copy = 0,
+  Over,
+  OverAndPremultiply,
+  Min,
+  ComponentAlpha,
+  MaxStates
+};
+
+enum class MLGPrimitiveTopology {
+  Unknown = 0,
+  TriangleStrip = 1,
+  TriangleList = 2,
+  UnitQuad = 3,
+  UnitTriangle = 4
+};
+
+struct MLGMappedResource {
+  uint8_t* mData;
+  uint32_t mStride;
+};
+
+enum class MLGMapType { READ = 0, WRITE, READ_WRITE, WRITE_DISCARD };
+
+enum class MLGTextureFlags { None, ShaderResource, RenderTarget };
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(MLGTextureFlags);
+
+enum class MLGRenderTargetFlags : uint32_t { Default = 0, ZBuffer = (1 << 0) };
+MOZ_MAKE_ENUM_CLASS_BITWISE_OPERATORS(MLGRenderTargetFlags);
+
+// NVIDIA drivers crash when we supply too many rects to ClearView - it
+// seems to cause a stack overflow >= 20 rects. We cap to 12 for now.
+static const size_t kMaxClearViewRects = 12;
+
+static inline SamplerMode FilterToSamplerMode(gfx::SamplingFilter aFilter) {
+  switch (aFilter) {
+    case gfx::SamplingFilter::POINT:
+      return SamplerMode::Point;
+    case gfx::SamplingFilter::LINEAR:
+    case gfx::SamplingFilter::GOOD:
+      return SamplerMode::LinearClamp;
+    default:
+      MOZ_ASSERT_UNREACHABLE("Unknown sampler mode");
+      return SamplerMode::LinearClamp;
+  }
+}
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_MLGDeviceTypes_h
diff --git a/gfx/layers/mlgpu/MLGPUScreenshotGrabber.cpp b/gfx/layers/mlgpu/MLGPUScreenshotGrabber.cpp
new file mode 100644
index 0000000000..01ca9dbf9c
--- /dev/null
+++ b/gfx/layers/mlgpu/MLGPUScreenshotGrabber.cpp
@@ -0,0 +1,336 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MLGPUScreenshotGrabber.h"
+
+#include "mozilla/RefPtr.h"
+#include "mozilla/TimeStamp.h"
+#include "mozilla/UniquePtr.h"
+
+#include "mozilla/layers/ProfilerScreenshots.h"
+#include "mozilla/gfx/Point.h"
+#include "mozilla/gfx/Swizzle.h"
+#include "mozilla/ProfilerMarkers.h"
+#include "SharedBufferMLGPU.h"
+#include "ShaderDefinitionsMLGPU.h"
+#include "nsTArray.h"
+
+namespace mozilla {
+
+using namespace gfx;
+
+namespace layers {
+
+using namespace mlg;
+
+/**
+ * The actual implementation of screenshot grabbing.
+ * The MLGPUScreenshotGrabberImpl object is destroyed if the profiler is
+ * disabled and MaybeGrabScreenshot notices it.
+ */
+class MLGPUScreenshotGrabberImpl final {
+ public:
+  explicit MLGPUScreenshotGrabberImpl(const IntSize& aReadbackTextureSize);
+  ~MLGPUScreenshotGrabberImpl();
+
+  void GrabScreenshot(MLGDevice* aDevice, MLGTexture* aTexture);
+  void ProcessQueue();
+
+ private:
+  struct QueueItem final {
+    mozilla::TimeStamp mTimeStamp;
+    RefPtr<MLGTexture> mScreenshotReadbackTexture;
+    gfx::IntSize mScreenshotSize;
+    gfx::IntSize mWindowSize;
+    RefPtr<MLGDevice> mDevice;
+    uintptr_t mWindowIdentifier;
+  };
+
+  RefPtr<MLGTexture> ScaleDownWindowTargetToSize(MLGDevice* aCompositor,
+                                                 const gfx::IntSize& aDestSize,
+                                                 MLGTexture* aWindowTarget,
+                                                 size_t aLevel);
+
+  struct CachedLevel {
+    RefPtr<MLGRenderTarget> mRenderTarget;
+    RefPtr<MLGBuffer> mVertexBuffer;
+    RefPtr<MLGBuffer> mWorldConstants;
+  };
+  bool BlitTexture(MLGDevice* aDevice, CachedLevel& aDest, MLGTexture* aSource,
+                   const IntSize& aSourceSize, const IntSize& aDestSize);
+
+  already_AddRefed<MLGTexture> TakeNextReadbackTexture(MLGDevice* aCompositor);
+  void ReturnReadbackTexture(MLGTexture* aReadbackTexture);
+
+  nsTArray<CachedLevel> mCachedLevels;
+  nsTArray<RefPtr<MLGTexture>> mAvailableReadbackTextures;
+  Maybe<QueueItem> mCurrentFrameQueueItem;
+  nsTArray<QueueItem> mQueue;
+  RefPtr<ProfilerScreenshots> mProfilerScreenshots;
+  const IntSize mReadbackTextureSize;
+};
+
+MLGPUScreenshotGrabber::MLGPUScreenshotGrabber() = default;
+
+MLGPUScreenshotGrabber::~MLGPUScreenshotGrabber() = default;
+
+void MLGPUScreenshotGrabber::MaybeGrabScreenshot(MLGDevice* aDevice,
+                                                 MLGTexture* aTexture) {
+  if (ProfilerScreenshots::IsEnabled()) {
+    if (!mImpl) {
+      mImpl = MakeUnique<MLGPUScreenshotGrabberImpl>(
+          ProfilerScreenshots::ScreenshotSize());
+    }
+    mImpl->GrabScreenshot(aDevice, aTexture);
+  } else if (mImpl) {
+    Destroy();
+  }
+}
+
+void MLGPUScreenshotGrabber::MaybeProcessQueue() {
+  if (ProfilerScreenshots::IsEnabled()) {
+    if (!mImpl) {
+      mImpl = MakeUnique<MLGPUScreenshotGrabberImpl>(
+          ProfilerScreenshots::ScreenshotSize());
+    }
+    mImpl->ProcessQueue();
+  } else if (mImpl) {
+    Destroy();
+  }
+}
+
+void MLGPUScreenshotGrabber::NotifyEmptyFrame() {
+#ifdef MOZ_GECKO_PROFILER
+  PROFILER_MARKER_UNTYPED("NoCompositorScreenshot because nothing changed",
+                          GRAPHICS);
+#endif
+}
+
+void MLGPUScreenshotGrabber::Destroy() { mImpl = nullptr; }
+
+MLGPUScreenshotGrabberImpl::MLGPUScreenshotGrabberImpl(
+    const IntSize& aReadbackTextureSize)
+    : mReadbackTextureSize(aReadbackTextureSize) {}
+
+MLGPUScreenshotGrabberImpl::~MLGPUScreenshotGrabberImpl() {
+  // Any queue items in mQueue or mCurrentFrameQueueItem will be lost.
+  // That's ok: Either the profiler has stopped and we don't care about these
+  // screenshots, or the window is closing and we don't really need the last
+  // few frames from the window.
+}
+
+// Scale down aWindowTexture into a MLGTexture of size
+// mReadbackTextureSize * (1 << aLevel) and return that MLGTexture.
+// Don't scale down by more than a factor of 2 with a single scaling operation,
+// because it'll look bad. If higher scales are needed, use another
+// intermediate target by calling this function recursively with aLevel + 1.
+RefPtr<MLGTexture> MLGPUScreenshotGrabberImpl::ScaleDownWindowTargetToSize(
+    MLGDevice* aDevice, const IntSize& aDestSize, MLGTexture* aWindowTexture,
+    size_t aLevel) {
+  aDevice->SetScissorRect(Nothing());
+  aDevice->SetDepthTestMode(MLGDepthTestMode::Disabled);
+  aDevice->SetTopology(MLGPrimitiveTopology::UnitQuad);
+  // DiagnosticText happens to be the simplest shader we have to draw a quad.
+  aDevice->SetVertexShader(VertexShaderID::DiagnosticText);
+  aDevice->SetPixelShader(PixelShaderID::DiagnosticText);
+  aDevice->SetBlendState(MLGBlendState::Copy);
+  aDevice->SetSamplerMode(0, SamplerMode::LinearClamp);
+
+  if (aLevel == mCachedLevels.Length()) {
+    RefPtr<MLGRenderTarget> rt =
+        aDevice->CreateRenderTarget(mReadbackTextureSize * (1 << aLevel));
+    mCachedLevels.AppendElement(CachedLevel{rt, nullptr, nullptr});
+  }
+  MOZ_RELEASE_ASSERT(aLevel < mCachedLevels.Length());
+
+  RefPtr<MLGTexture> sourceTarget = aWindowTexture;
+  IntSize sourceSize = aWindowTexture->GetSize();
+  if (aWindowTexture->GetSize().width > aDestSize.width * 2) {
+    sourceSize = aDestSize * 2;
+    sourceTarget = ScaleDownWindowTargetToSize(aDevice, sourceSize,
+                                               aWindowTexture, aLevel + 1);
+  }
+
+  if (sourceTarget) {
+    if (BlitTexture(aDevice, mCachedLevels[aLevel], sourceTarget, sourceSize,
+                    aDestSize)) {
+      return mCachedLevels[aLevel].mRenderTarget->GetTexture();
+    }
+  }
+  return nullptr;
+}
+
+bool MLGPUScreenshotGrabberImpl::BlitTexture(MLGDevice* aDevice,
+                                             CachedLevel& aLevel,
+                                             MLGTexture* aSource,
+                                             const IntSize& aSourceSize,
+                                             const IntSize& aDestSize) {
+  MOZ_ASSERT(aLevel.mRenderTarget);
+  MLGRenderTarget* rt = aLevel.mRenderTarget;
+  MOZ_ASSERT(aDestSize <= rt->GetSize());
+
+  struct TextureRect {
+    Rect bounds;
+    Rect texCoords;
+  };
+
+  if (!aLevel.mVertexBuffer) {
+    TextureRect rect;
+    rect.bounds = Rect(Point(), Size(aDestSize));
+    rect.texCoords =
+        Rect(0.0, 0.0, Float(aSourceSize.width) / aSource->GetSize().width,
+             Float(aSourceSize.height) / aSource->GetSize().height);
+
+    VertexStagingBuffer instances;
+    if (!instances.AppendItem(rect)) {
+      return false;
+    }
+
+    RefPtr<MLGBuffer> vertices = aDevice->CreateBuffer(
+        MLGBufferType::Vertex, instances.NumItems() * instances.SizeOfItem(),
+        MLGUsage::Immutable, instances.GetBufferStart());
+    if (!vertices) {
+      return false;
+    }
+
+    aLevel.mVertexBuffer = vertices;
+  }
+
+  if (!aLevel.mWorldConstants) {
+    WorldConstants vsConstants;
+    Matrix4x4 projection = Matrix4x4::Translation(-1.0, 1.0, 0.0);
+    projection.PreScale(2.0 / float(rt->GetSize().width),
+                        2.0 / float(rt->GetSize().height), 1.0f);
+    projection.PreScale(1.0f, -1.0f, 1.0f);
+
+    memcpy(vsConstants.projection, &projection._11, 64);
+    vsConstants.targetOffset = Point();
+    vsConstants.sortIndexOffset = 0;
+    vsConstants.debugFrameNumber = 0;
+
+    aLevel.mWorldConstants =
+        aDevice->CreateBuffer(MLGBufferType::Constant, sizeof(vsConstants),
+                              MLGUsage::Immutable, &vsConstants);
+
+    if (!aLevel.mWorldConstants) {
+      return false;
+    }
+  }
+
+  aDevice->SetRenderTarget(rt);
+  aDevice->SetPSTexture(0, aSource);
+  aDevice->SetViewport(IntRect(IntPoint(0, 0), rt->GetSize()));
+  aDevice->SetVertexBuffer(1, aLevel.mVertexBuffer, sizeof(TextureRect));
+  aDevice->SetVSConstantBuffer(kWorldConstantBufferSlot,
+                               aLevel.mWorldConstants);
+  aDevice->DrawInstanced(4, 1, 0, 0);
+  return true;
+}
+
+void MLGPUScreenshotGrabberImpl::GrabScreenshot(MLGDevice* aDevice,
+                                                MLGTexture* aTexture) {
+  Size windowSize(aTexture->GetSize());
+  float scale = std::min(mReadbackTextureSize.width / windowSize.width,
+                         mReadbackTextureSize.height / windowSize.height);
+  IntSize scaledSize = IntSize::Round(windowSize * scale);
+
+  // The initial target is non-GPU readable. This copy could probably be
+  // avoided if we had created the swap chain differently. However we
+  // don't know if that may inadvertently affect performance in the
+  // non-profiling case.
+  RefPtr<MLGTexture> windowTexture = aDevice->CreateTexture(
+      aTexture->GetSize(), SurfaceFormat::B8G8R8A8, MLGUsage::Default,
+      MLGTextureFlags::ShaderResource);
+  aDevice->CopyTexture(windowTexture, IntPoint(), aTexture,
+                       IntRect(IntPoint(), aTexture->GetSize()));
+
+  RefPtr<MLGTexture> scaledTarget =
+      ScaleDownWindowTargetToSize(aDevice, scaledSize, windowTexture, 0);
+
+  if (!scaledTarget) {
+    PROFILER_MARKER_UNTYPED(
+        "NoCompositorScreenshot because ScaleDownWindowTargetToSize failed",
+        GRAPHICS);
+    return;
+  }
+
+  RefPtr<MLGTexture> readbackTexture = TakeNextReadbackTexture(aDevice);
+  if (!readbackTexture) {
+    PROFILER_MARKER_UNTYPED(
+        "NoCompositorScreenshot because AsyncReadbackReadbackTexture creation "
+        "failed",
+        GRAPHICS);
+    return;
+  }
+
+  aDevice->CopyTexture(readbackTexture, IntPoint(), scaledTarget,
+                       IntRect(IntPoint(), mReadbackTextureSize));
+
+  // This QueueItem will be added to the queue at the end of the next call to
+  // ProcessQueue(). This ensures that the ReadbackTexture isn't mapped into
+  // main memory until the next frame. If we did it in this frame, we'd block on
+  // the GPU.
+  mCurrentFrameQueueItem =
+      Some(QueueItem{TimeStamp::Now(), std::move(readbackTexture), scaledSize,
+                     aTexture->GetSize(), aDevice,
+                     reinterpret_cast<uintptr_t>(static_cast<void*>(this))});
+}
+
+already_AddRefed<MLGTexture>
+MLGPUScreenshotGrabberImpl::TakeNextReadbackTexture(MLGDevice* aDevice) {
+  if (!mAvailableReadbackTextures.IsEmpty()) {
+    RefPtr<MLGTexture> readbackTexture = mAvailableReadbackTextures[0];
+    mAvailableReadbackTextures.RemoveElementAt(0);
+    return readbackTexture.forget();
+  }
+  return aDevice
+      ->CreateTexture(mReadbackTextureSize, SurfaceFormat::B8G8R8A8,
+                      MLGUsage::Staging, MLGTextureFlags::None)
+      .forget();
+}
+
+void MLGPUScreenshotGrabberImpl::ReturnReadbackTexture(
+    MLGTexture* aReadbackTexture) {
+  mAvailableReadbackTextures.AppendElement(aReadbackTexture);
+}
+
+void MLGPUScreenshotGrabberImpl::ProcessQueue() {
+  if (!mQueue.IsEmpty()) {
+    if (!mProfilerScreenshots) {
+      mProfilerScreenshots = new ProfilerScreenshots();
+    }
+    for (const auto& item : mQueue) {
+      mProfilerScreenshots->SubmitScreenshot(
+          item.mWindowIdentifier, item.mWindowSize, item.mScreenshotSize,
+          item.mTimeStamp, [&item](DataSourceSurface* aTargetSurface) {
+            MLGMappedResource map;
+            if (!item.mDevice->Map(item.mScreenshotReadbackTexture,
+                                   MLGMapType::READ, &map)) {
+              return false;
+            }
+            DataSourceSurface::ScopedMap destMap(aTargetSurface,
+                                                 DataSourceSurface::WRITE);
+            bool result =
+                SwizzleData(map.mData, map.mStride, SurfaceFormat::B8G8R8A8,
+                            destMap.GetData(), destMap.GetStride(),
+                            aTargetSurface->GetFormat(), item.mScreenshotSize);
+
+            item.mDevice->Unmap(item.mScreenshotReadbackTexture);
+            return result;
+          });
+      ReturnReadbackTexture(item.mScreenshotReadbackTexture);
+    }
+  }
+  mQueue.Clear();
+
+  if (mCurrentFrameQueueItem) {
+    mQueue.AppendElement(std::move(*mCurrentFrameQueueItem));
+    mCurrentFrameQueueItem = Nothing();
+  }
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/MLGPUScreenshotGrabber.h b/gfx/layers/mlgpu/MLGPUScreenshotGrabber.h
new file mode 100644
index 0000000000..1be5c0f0fb
--- /dev/null
+++ b/gfx/layers/mlgpu/MLGPUScreenshotGrabber.h
@@ -0,0 +1,59 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_layers_MLGPUScreenshotGrabber_h
+#define mozilla_layers_MLGPUScreenshotGrabber_h
+
+#include "mozilla/UniquePtr.h"
+#include "mozilla/layers/MLGDevice.h"
+
+namespace mozilla {
+namespace layers {
+
+class MLGPUScreenshotGrabberImpl;
+
+/**
+ * Used by LayerManagerComposite to grab snapshots from the compositor and
+ * submit them to the Gecko profiler.
+ * Doesn't do any work if the profiler is not running or the "screenshots"
+ * feature is not enabled.
+ * Screenshots are scaled down to fit within a fixed size, and read back to
+ * main memory using async readback. Scaling is done in multiple scale-by-0.5x
+ * steps using CompositingRenderTargets and Compositor::BlitFromRenderTarget,
+ * and readback is done using AsyncReadbackBuffers.
+ */
+class MLGPUScreenshotGrabber final {
+ public:
+  MLGPUScreenshotGrabber();
+  ~MLGPUScreenshotGrabber();
+
+  // Scale the contents of aTexture into an appropriately sized MLGTexture
+  // and read its contents into an AsyncReadbackBuffer. The AsyncReadbackBuffer
+  // is not mapped into main memory until the second call to
+  // MaybeProcessQueue() after this call to MaybeGrabScreenshot().
+  void MaybeGrabScreenshot(MLGDevice* aDevice, MLGTexture* aTexture);
+
+  // Map the contents of any outstanding AsyncReadbackBuffers from previous
+  // composites into main memory and submit each screenshot to the profiler.
+  void MaybeProcessQueue();
+
+  // Insert a special profiler marker for a composite that didn't do any actual
+  // compositing, so that the profiler knows why no screenshot was taken for
+  // this frame.
+  void NotifyEmptyFrame();
+
+  // Destroy all Compositor-related resources that this class is holding on to.
+  void Destroy();
+
+ private:
+  // non-null while ProfilerScreenshots::IsEnabled() returns true
+  UniquePtr<MLGPUScreenshotGrabberImpl> mImpl;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_layers_MLGPUScreenshotGrabber_h
diff --git a/gfx/layers/mlgpu/MaskOperation.cpp b/gfx/layers/mlgpu/MaskOperation.cpp
new file mode 100644
index 0000000000..9976ec4f62
--- /dev/null
+++ b/gfx/layers/mlgpu/MaskOperation.cpp
@@ -0,0 +1,173 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MaskOperation.h"
+#include "FrameBuilder.h"
+#include "LayerMLGPU.h"
+#include "mozilla/layers/LayersHelpers.h"
+#include "MLGDevice.h"
+#include "TexturedLayerMLGPU.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace gfx;
+
+MaskOperation::MaskOperation(FrameBuilder* aBuilder) {}
+
+MaskOperation::MaskOperation(FrameBuilder* aBuilder, MLGTexture* aSource)
+    : mTexture(aSource) {}
+
+MaskOperation::~MaskOperation() = default;
+
+static gfx::Rect ComputeQuadForMaskLayer(Layer* aLayer, const IntSize& aSize) {
+  const Matrix4x4& transform = aLayer->GetEffectiveTransform();
+  MOZ_ASSERT(transform.Is2D(), "Mask layers should not have 3d transforms");
+
+  Rect bounds(Point(0, 0), Size(aSize));
+  return transform.As2D().TransformBounds(bounds);
+}
+
+Rect MaskOperation::ComputeMaskRect(Layer* aLayer) const {
+  Layer* maskLayer = aLayer->GetMaskLayer() ? aLayer->GetMaskLayer()
+                                            : aLayer->GetAncestorMaskLayerAt(0);
+  MOZ_ASSERT(
+      (aLayer->GetAncestorMaskLayerCount() == 0 && aLayer->GetMaskLayer()) ||
+      (aLayer->GetAncestorMaskLayerCount() == 1 && !aLayer->GetMaskLayer()));
+
+  return ComputeQuadForMaskLayer(maskLayer, mTexture->GetSize());
+}
+
+// This is only needed for std::map.
+bool MaskTexture::operator<(const MaskTexture& aOther) const {
+  if (mRect.X() != aOther.mRect.X()) {
+    return mRect.X() < aOther.mRect.X();
+  }
+  if (mRect.Y() != aOther.mRect.Y()) {
+    return mRect.Y() < aOther.mRect.Y();
+  }
+  if (mRect.Width() != aOther.mRect.Width()) {
+    return mRect.Width() < aOther.mRect.Width();
+  }
+  if (mRect.Height() != aOther.mRect.Height()) {
+    return mRect.Height() < aOther.mRect.Height();
+  }
+  return mSource < aOther.mSource;
+}
+
+RefPtr<TextureSource> GetMaskLayerTexture(Layer* aLayer) {
+  LayerMLGPU* layer = aLayer->AsHostLayer()->AsLayerMLGPU();
+  TexturedLayerMLGPU* texLayer = layer->AsTexturedLayerMLGPU();
+  if (!texLayer) {
+    MOZ_ASSERT_UNREACHABLE("Mask layers should be texture layers");
+    return nullptr;
+  }
+
+  RefPtr<TextureSource> source = texLayer->BindAndGetTexture();
+  if (!source) {
+    gfxWarning() << "Mask layer does not have a TextureSource";
+    return nullptr;
+  }
+  return source;
+}
+
+MaskCombineOperation::MaskCombineOperation(FrameBuilder* aBuilder)
+    : MaskOperation(aBuilder), mBuilder(aBuilder) {}
+
+MaskCombineOperation::~MaskCombineOperation() = default;
+
+void MaskCombineOperation::Init(const MaskTextureList& aTextures) {
+  // All masks for a single layer exist in the same coordinate space. Find the
+  // area that covers all rects.
+  Rect area = aTextures[0].mRect;
+  for (size_t i = 1; i < aTextures.size(); i++) {
+    area = area.Intersect(aTextures[i].mRect);
+  }
+
+  // Go through and decide which areas of the textures are relevant.
+  for (size_t i = 0; i < aTextures.size(); i++) {
+    Rect rect = aTextures[i].mRect.Intersect(area);
+    if (rect.IsEmpty()) {
+      continue;
+    }
+
+    rect -= aTextures[i].mRect.TopLeft();
+    mTextures.push_back(MaskTexture(rect, aTextures[i].mSource));
+  }
+
+  IntRect size;
+  Rect bounds = area;
+  bounds.RoundOut();
+  bounds.ToIntRect(&size);
+
+  if (size.IsEmpty()) {
+    return;
+  }
+
+  mTarget = mBuilder->GetDevice()->CreateRenderTarget(size.Size());
+  if (mTarget) {
+    mTexture = mTarget->GetTexture();
+  }
+  mArea = area;
+}
+
+void MaskCombineOperation::PrepareForRendering() {
+  for (const auto& entry : mTextures) {
+    Rect texCoords = TextureRectToCoords(entry.mRect, entry.mSource->GetSize());
+
+    SharedVertexBuffer* shared = mBuilder->GetDevice()->GetSharedVertexBuffer();
+
+    VertexBufferSection section;
+    if (!shared->Allocate(&section, 1, sizeof(texCoords), &texCoords)) {
+      continue;
+    }
+    mInputBuffers.push_back(section);
+  }
+}
+
+void MaskCombineOperation::Render() {
+  if (!mTarget) {
+    return;
+  }
+
+  RefPtr<MLGDevice> device = mBuilder->GetDevice();
+
+  device->SetTopology(MLGPrimitiveTopology::UnitQuad);
+  device->SetVertexShader(VertexShaderID::MaskCombiner);
+
+  device->SetPixelShader(PixelShaderID::MaskCombiner);
+  device->SetSamplerMode(0, SamplerMode::LinearClamp);
+  device->SetBlendState(MLGBlendState::Min);
+
+  // Since the mask operation is effectively an AND operation, we initialize
+  // the entire r-channel to 1.
+  device->Clear(mTarget, DeviceColor(1, 0, 0, 1));
+  device->SetScissorRect(Nothing());
+  device->SetRenderTarget(mTarget);
+  device->SetViewport(IntRect(IntPoint(0, 0), mTarget->GetSize()));
+
+  for (size_t i = 0; i < mInputBuffers.size(); i++) {
+    if (!mInputBuffers[i].IsValid()) {
+      continue;
+    }
+    device->SetVertexBuffer(1, &mInputBuffers[i]);
+    device->SetPSTexture(0, mTextures[i].mSource);
+    device->DrawInstanced(4, mInputBuffers[i].NumVertices(), 0, 0);
+  }
+}
+
+void AppendToMaskTextureList(MaskTextureList& aList, Layer* aLayer) {
+  RefPtr<TextureSource> source = GetMaskLayerTexture(aLayer);
+  if (!source) {
+    return;
+  }
+
+  gfx::Rect rect = ComputeQuadForMaskLayer(aLayer, source->GetSize());
+  aList.push_back(MaskTexture(rect, source));
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/MaskOperation.h b/gfx/layers/mlgpu/MaskOperation.h
new file mode 100644
index 0000000000..2cf74196ea
--- /dev/null
+++ b/gfx/layers/mlgpu/MaskOperation.h
@@ -0,0 +1,89 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_MaskOperation_h
+#define mozilla_gfx_layers_mlgpu_MaskOperation_h
+
+#include "mozilla/RefPtr.h"
+#include "mozilla/gfx/Rect.h"
+#include "SharedBufferMLGPU.h"
+#include <vector>
+
+namespace mozilla {
+namespace layers {
+
+class FrameBuilder;
+class Layer;
+class MLGDevice;
+class MLGRenderTarget;
+class MLGTexture;
+class TextureSource;
+
+class MaskOperation {
+  NS_INLINE_DECL_REFCOUNTING(MaskOperation)
+
+ public:
+  // For when the exact texture is known ahead of time.
+  MaskOperation(FrameBuilder* aBuilder, MLGTexture* aSource);
+
+  // Return the mask rectangle in screen coordinates. This function takes a
+  // layer because a single-texture mask operation is not dependent on a
+  // specific mask transform. (Multiple mask layer operations are, and they
+  // ignore the layer parameter).
+  virtual gfx::Rect ComputeMaskRect(Layer* aLayer) const;
+
+  MLGTexture* GetTexture() const { return mTexture; }
+  bool IsEmpty() const { return !mTexture; }
+
+ protected:
+  explicit MaskOperation(FrameBuilder* aBuilder);
+  virtual ~MaskOperation();
+
+ protected:
+  RefPtr<MLGTexture> mTexture;
+};
+
+struct MaskTexture {
+  MaskTexture() : mSource(nullptr) {}
+  MaskTexture(const gfx::Rect& aRect, TextureSource* aSource)
+      : mRect(aRect), mSource(aSource) {}
+
+  bool operator<(const MaskTexture& aOther) const;
+
+  gfx::Rect mRect;
+  RefPtr<TextureSource> mSource;
+};
+
+typedef std::vector<MaskTexture> MaskTextureList;
+
+class MaskCombineOperation final : public MaskOperation {
+ public:
+  explicit MaskCombineOperation(FrameBuilder* aBuilder);
+  virtual ~MaskCombineOperation();
+
+  void Init(const MaskTextureList& aTextures);
+
+  void PrepareForRendering();
+  void Render();
+
+  gfx::Rect ComputeMaskRect(Layer* aLayer) const override { return mArea; }
+
+ private:
+  FrameBuilder* mBuilder;
+  gfx::Rect mArea;
+  MaskTextureList mTextures;
+  RefPtr<MLGRenderTarget> mTarget;
+
+  std::vector<VertexBufferSection> mInputBuffers;
+};
+
+RefPtr<TextureSource> GetMaskLayerTexture(Layer* aLayer);
+void AppendToMaskTextureList(MaskTextureList& aList, Layer* aLayer);
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_MaskOperation_h
diff --git a/gfx/layers/mlgpu/MemoryReportingMLGPU.cpp b/gfx/layers/mlgpu/MemoryReportingMLGPU.cpp
new file mode 100644
index 0000000000..efb66220b9
--- /dev/null
+++ b/gfx/layers/mlgpu/MemoryReportingMLGPU.cpp
@@ -0,0 +1,54 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "MemoryReportingMLGPU.h"
+#include "nsIMemoryReporter.h"
+
+namespace mozilla {
+namespace layers {
+namespace mlg {
+
+mozilla::Atomic<size_t> sConstantBufferUsage;
+mozilla::Atomic<size_t> sVertexBufferUsage;
+mozilla::Atomic<size_t> sRenderTargetUsage;
+
+class MemoryReportingMLGPU final : public nsIMemoryReporter {
+ public:
+  NS_DECL_ISUPPORTS
+
+  NS_IMETHOD CollectReports(nsIHandleReportCallback* aHandleReport,
+                            nsISupports* aData, bool aAnonymize) override {
+    if (sConstantBufferUsage) {
+      MOZ_COLLECT_REPORT("mlgpu-constant-buffers", KIND_OTHER, UNITS_BYTES,
+                         sConstantBufferUsage,
+                         "Advanced Layers shader constant buffers.");
+    }
+    if (sVertexBufferUsage) {
+      MOZ_COLLECT_REPORT("mlgpu-vertex-buffers", KIND_OTHER, UNITS_BYTES,
+                         sVertexBufferUsage,
+                         "Advanced Layers shader vertex buffers.");
+    }
+    if (sRenderTargetUsage) {
+      MOZ_COLLECT_REPORT(
+          "mlgpu-render-targets", KIND_OTHER, UNITS_BYTES, sRenderTargetUsage,
+          "Advanced Layers render target textures and depth buffers.");
+    }
+    return NS_OK;
+  }
+
+ private:
+  ~MemoryReportingMLGPU() = default;
+};
+
+NS_IMPL_ISUPPORTS(MemoryReportingMLGPU, nsIMemoryReporter);
+
+void InitializeMemoryReporters() {
+  RegisterStrongMemoryReporter(new MemoryReportingMLGPU());
+}
+
+}  // namespace mlg
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/MemoryReportingMLGPU.h b/gfx/layers/mlgpu/MemoryReportingMLGPU.h
new file mode 100644
index 0000000000..21ac0ea940
--- /dev/null
+++ b/gfx/layers/mlgpu/MemoryReportingMLGPU.h
@@ -0,0 +1,26 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_MemoryReportingMLGPU_h
+#define mozilla_gfx_layers_mlgpu_MemoryReportingMLGPU_h
+
+#include "mozilla/Atomics.h"
+
+namespace mozilla {
+namespace layers {
+namespace mlg {
+
+void InitializeMemoryReporters();
+
+extern mozilla::Atomic<size_t> sConstantBufferUsage;
+extern mozilla::Atomic<size_t> sVertexBufferUsage;
+extern mozilla::Atomic<size_t> sRenderTargetUsage;
+
+}  // namespace mlg
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_MemoryReportingMLGPU_h
diff --git a/gfx/layers/mlgpu/PaintedLayerMLGPU.cpp b/gfx/layers/mlgpu/PaintedLayerMLGPU.cpp
new file mode 100644
index 0000000000..cdd7ac386d
--- /dev/null
+++ b/gfx/layers/mlgpu/PaintedLayerMLGPU.cpp
@@ -0,0 +1,219 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "PaintedLayerMLGPU.h"
+#include "LayerManagerMLGPU.h"
+#include "mozilla/layers/LayersHelpers.h"
+#include "mozilla/layers/TiledContentHost.h"
+#include "UnitTransforms.h"
+
+namespace mozilla {
+
+using namespace gfx;
+
+namespace layers {
+
+PaintedLayerMLGPU::PaintedLayerMLGPU(LayerManagerMLGPU* aManager)
+    : PaintedLayer(aManager, static_cast<HostLayer*>(this)),
+      LayerMLGPU(aManager) {
+  MOZ_COUNT_CTOR(PaintedLayerMLGPU);
+}
+
+PaintedLayerMLGPU::~PaintedLayerMLGPU() {
+  MOZ_COUNT_DTOR(PaintedLayerMLGPU);
+
+  CleanupResources();
+}
+
+bool PaintedLayerMLGPU::OnPrepareToRender(FrameBuilder* aBuilder) {
+  // Reset our cached texture pointers. The next call to AssignToView will
+  // populate them again.
+  mTexture = nullptr;
+  mTextureOnWhite = nullptr;
+  return !!mHost;
+}
+
+void PaintedLayerMLGPU::SetRenderRegion(LayerIntRegion&& aRegion) {
+  mRenderRegion = std::move(aRegion);
+
+  LayerIntRect bounds(mRenderRegion.GetBounds().TopLeft(),
+                      ViewAs<LayerPixel>(mTexture->GetSize()));
+  mRenderRegion.AndWith(bounds);
+}
+
+const LayerIntRegion& PaintedLayerMLGPU::GetDrawRects() {
+#ifndef MOZ_IGNORE_PAINT_WILL_RESAMPLE
+  // Note: we don't set PaintWillResample on our ContentTextureHost. The old
+  // compositor must do this since ContentHost is responsible for issuing
+  // draw calls, but in AL we can handle it directly here.
+  //
+  // Note that when AL performs CPU-based occlusion culling (the default
+  // behavior), we might break up the visible region again. If that turns
+  // out to be a problem, we can factor this into ForEachDrawRect instead.
+  if (MayResample()) {
+    mDrawRects = mRenderRegion.GetBounds();
+    return mDrawRects;
+  }
+#endif
+  return mRenderRegion;
+}
+
+bool PaintedLayerMLGPU::SetCompositableHost(CompositableHost* aHost) {
+  switch (aHost->GetType()) {
+    case CompositableType::CONTENT_TILED:
+    case CompositableType::CONTENT_SINGLE:
+    case CompositableType::CONTENT_DOUBLE: {
+      if (mHost && mHost != aHost->AsContentHost()) {
+        mHost->Detach(this);
+      }
+      mHost = aHost->AsContentHost();
+      if (!mHost) {
+        gfxWarning() << "ContentHostBase is not a ContentHostTexture";
+      }
+      return true;
+    }
+    default:
+      return false;
+  }
+}
+
+CompositableHost* PaintedLayerMLGPU::GetCompositableHost() { return mHost; }
+
+gfx::Point PaintedLayerMLGPU::GetDestOrigin() const { return mDestOrigin; }
+
+void PaintedLayerMLGPU::AssignToView(FrameBuilder* aBuilder,
+                                     RenderViewMLGPU* aView,
+                                     Maybe<Polygon>&& aGeometry) {
+  if (TiledContentHost* tiles = mHost->AsTiledContentHost()) {
+    // Note: we do not support the low-res buffer yet.
+    MOZ_ASSERT(tiles->GetLowResBuffer().GetTileCount() == 0);
+    AssignHighResTilesToView(aBuilder, aView, tiles, aGeometry);
+    return;
+  }
+
+  // If we don't have a texture yet, acquire one from the ContentHost now.
+  if (!mTexture) {
+    ContentHostTexture* single = mHost->AsContentHostTexture();
+    if (!single) {
+      return;
+    }
+
+    mTexture = single->AcquireTextureSource();
+    if (!mTexture) {
+      return;
+    }
+    mTextureOnWhite = single->AcquireTextureSourceOnWhite();
+    mDestOrigin = single->GetOriginOffset();
+  }
+
+  // Fall through to the single texture case.
+  LayerMLGPU::AssignToView(aBuilder, aView, std::move(aGeometry));
+}
+
+void PaintedLayerMLGPU::AssignHighResTilesToView(
+    FrameBuilder* aBuilder, RenderViewMLGPU* aView, TiledContentHost* aTileHost,
+    const Maybe<Polygon>& aGeometry) {
+  TiledLayerBufferComposite& tiles = aTileHost->GetHighResBuffer();
+
+  LayerIntRegion compositeRegion = ViewAs<LayerPixel>(tiles.GetValidRegion());
+  compositeRegion.AndWith(GetShadowVisibleRegion());
+  if (compositeRegion.IsEmpty()) {
+    return;
+  }
+
+  AssignTileBufferToView(aBuilder, aView, tiles, compositeRegion, aGeometry);
+}
+
+void PaintedLayerMLGPU::AssignTileBufferToView(
+    FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+    TiledLayerBufferComposite& aTiles, const LayerIntRegion& aCompositeRegion,
+    const Maybe<Polygon>& aGeometry) {
+  float resolution = aTiles.GetResolution();
+
+  // Save these so they can be restored at the end.
+  float baseOpacity = mComputedOpacity;
+  LayerIntRegion visible = GetShadowVisibleRegion();
+
+  for (size_t i = 0; i < aTiles.GetTileCount(); i++) {
+    TileHost& tile = aTiles.GetTile(i);
+    if (tile.IsPlaceholderTile()) {
+      continue;
+    }
+
+    TileCoordIntPoint coord = aTiles.GetPlacement().TileCoord(i);
+    // A sanity check that catches a lot of mistakes.
+    MOZ_ASSERT(coord.x == tile.mTileCoord.x && coord.y == tile.mTileCoord.y);
+
+    IntPoint offset = aTiles.GetTileOffset(coord);
+
+    // Use LayerIntRect here so we don't have to keep re-allocating the region
+    // to change the unit type.
+    LayerIntRect tileRect(ViewAs<LayerPixel>(offset),
+                          ViewAs<LayerPixel>(aTiles.GetScaledTileSize()));
+    LayerIntRegion tileDrawRegion = tileRect;
+    tileDrawRegion.AndWith(aCompositeRegion);
+    if (tileDrawRegion.IsEmpty()) {
+      continue;
+    }
+    tileDrawRegion.ScaleRoundOut(resolution, resolution);
+
+    // Update layer state for this tile - that includes the texture, visible
+    // region, and opacity.
+    mTexture = tile.AcquireTextureSource();
+    if (!mTexture) {
+      continue;
+    }
+
+    mTextureOnWhite = tile.AcquireTextureSourceOnWhite();
+
+    SetShadowVisibleRegion(tileDrawRegion);
+    mComputedOpacity = tile.GetFadeInOpacity(baseOpacity);
+    mDestOrigin = offset;
+
+    // Yes, it's a bit weird that we're assigning the same layer to the same
+    // view multiple times. Note that each time, the texture, computed
+    // opacity, origin, and visible region are updated to match the current
+    // tile, and we restore these properties after we've finished processing
+    // all tiles.
+    Maybe<Polygon> geometry = aGeometry;
+    LayerMLGPU::AssignToView(aBuilder, aView, std::move(geometry));
+  }
+
+  // Restore the computed opacity and visible region.
+  mComputedOpacity = baseOpacity;
+  SetShadowVisibleRegion(std::move(visible));
+}
+
+void PaintedLayerMLGPU::CleanupResources() {
+  if (mHost) {
+    mHost->Detach(this);
+  }
+  mTexture = nullptr;
+  mTextureOnWhite = nullptr;
+  mHost = nullptr;
+}
+
+void PaintedLayerMLGPU::PrintInfo(std::stringstream& aStream,
+                                  const char* aPrefix) {
+  PaintedLayer::PrintInfo(aStream, aPrefix);
+  if (mHost && mHost->IsAttached()) {
+    aStream << "\n";
+    nsAutoCString pfx(aPrefix);
+    pfx += "  ";
+    mHost->PrintInfo(aStream, pfx.get());
+  }
+}
+
+void PaintedLayerMLGPU::Disconnect() { CleanupResources(); }
+
+bool PaintedLayerMLGPU::IsContentOpaque() {
+  return !!(GetContentFlags() & CONTENT_OPAQUE);
+}
+
+void PaintedLayerMLGPU::CleanupCachedResources() { CleanupResources(); }
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/PaintedLayerMLGPU.h b/gfx/layers/mlgpu/PaintedLayerMLGPU.h
new file mode 100644
index 0000000000..670ede556a
--- /dev/null
+++ b/gfx/layers/mlgpu/PaintedLayerMLGPU.h
@@ -0,0 +1,100 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_GFX_PAINTEDLAYERMLGPU_H
+#define MOZILLA_GFX_PAINTEDLAYERMLGPU_H
+
+#include "LayerManagerMLGPU.h"
+#include "mozilla/layers/ContentHost.h"
+#include "mozilla/layers/LayerMLGPU.h"
+#include "MLGDeviceTypes.h"
+#include "nsRegionFwd.h"
+#include <functional>
+
+namespace mozilla {
+namespace layers {
+
+class TiledLayerBufferComposite;
+
+class PaintedLayerMLGPU final : public PaintedLayer, public LayerMLGPU {
+ public:
+  explicit PaintedLayerMLGPU(LayerManagerMLGPU* aManager);
+  virtual ~PaintedLayerMLGPU();
+
+  // Layer
+  HostLayer* AsHostLayer() override { return this; }
+  PaintedLayerMLGPU* AsPaintedLayerMLGPU() override { return this; }
+  Layer* GetLayer() override { return this; }
+  bool SetCompositableHost(CompositableHost*) override;
+  CompositableHost* GetCompositableHost() override;
+  void Disconnect() override;
+  bool IsContentOpaque() override;
+
+  // PaintedLayer
+  void InvalidateRegion(const nsIntRegion& aRegion) override {
+    MOZ_CRASH("PaintedLayerMLGPU can't fill invalidated regions");
+  }
+
+  bool HasComponentAlpha() const { return !!mTextureOnWhite; }
+  TextureSource* GetTexture() const { return mTexture; }
+  TextureSource* GetTextureOnWhite() const {
+    MOZ_ASSERT(HasComponentAlpha());
+    return mTextureOnWhite;
+  }
+  gfx::Point GetDestOrigin() const;
+
+  SamplerMode GetSamplerMode() {
+    // Note that when resamping, we must break the texture coordinates into
+    // no-repeat rects. When we have simple integer translations we can
+    // simply wrap around the edge of the buffer texture.
+    return MayResample() ? SamplerMode::LinearClamp : SamplerMode::LinearRepeat;
+  }
+
+  void SetRenderRegion(LayerIntRegion&& aRegion) override;
+
+  // To avoid sampling issues with complex regions and transforms, we
+  // squash the visible region for PaintedLayers into a single draw
+  // rect. RenderPasses should use this method instead of GetRenderRegion.
+  const LayerIntRegion& GetDrawRects();
+
+  MOZ_LAYER_DECL_NAME("PaintedLayerMLGPU", TYPE_PAINTED)
+
+  void CleanupCachedResources();
+
+ protected:
+  void PrintInfo(std::stringstream& aStream, const char* aPrefix) override;
+  bool OnPrepareToRender(FrameBuilder* aBuilder) override;
+
+  // We override this to support tiling.
+  void AssignToView(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                    Maybe<gfx::Polygon>&& aGeometry) override;
+
+  void AssignHighResTilesToView(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                                TiledContentHost* aTileHost,
+                                const Maybe<gfx::Polygon>& aGeometry);
+
+  // Helper for Assign*TilesToView.
+  void AssignTileBufferToView(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                              TiledLayerBufferComposite& aTiles,
+                              const LayerIntRegion& aCompositeRegion,
+                              const Maybe<gfx::Polygon>& aGeometry);
+
+  void CleanupResources();
+
+ private:
+  RefPtr<ContentHost> mHost;
+  RefPtr<TextureSource> mTexture;
+  RefPtr<TextureSource> mTextureOnWhite;
+#ifndef MOZ_IGNORE_PAINT_WILL_RESAMPLE
+  LayerIntRegion mDrawRects;
+#endif
+  gfx::IntPoint mDestOrigin;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif
diff --git a/gfx/layers/mlgpu/RenderPassMLGPU-inl.h b/gfx/layers/mlgpu/RenderPassMLGPU-inl.h
new file mode 100644
index 0000000000..6e497a8595
--- /dev/null
+++ b/gfx/layers/mlgpu/RenderPassMLGPU-inl.h
@@ -0,0 +1,67 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_RenderPassMLGPU_inl_h
+#define mozilla_gfx_layers_mlgpu_RenderPassMLGPU_inl_h
+
+namespace mozilla {
+namespace layers {
+
+template <typename Traits>
+static inline bool AddShaderTriangles(VertexStagingBuffer* aBuffer,
+                                      const Traits& aTraits,
+                                      const gfx::Polygon* aGeometry = nullptr) {
+  typedef typename Traits::TriangleVertices TriangleVertices;
+  typedef typename Traits::FirstTriangle FirstTriangle;
+  typedef typename Traits::SecondTriangle SecondTriangle;
+
+  if (!aGeometry) {
+    TriangleVertices base1 = aTraits.MakeVertex(FirstTriangle());
+    TriangleVertices base2 = aTraits.MakeVertex(SecondTriangle());
+    auto data1 = aTraits.MakeVertexData(FirstTriangle());
+    auto data2 = aTraits.MakeVertexData(SecondTriangle());
+    return aBuffer->PrependItem(base1, data1) &&
+           aBuffer->PrependItem(base2, data2);
+  }
+
+  auto triangles = aTraits.GenerateTriangles(*aGeometry);
+  for (const auto& triangle : triangles) {
+    TriangleVertices base = aTraits.MakeVertex(triangle);
+    auto data = aTraits.MakeVertexData(triangle);
+    if (!aBuffer->PrependItem(base, data)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+template <typename Traits>
+inline bool BatchRenderPass<Traits>::Txn::AddImpl(const Traits& aTraits) {
+  VertexStagingBuffer* instances = mPass->GetInstances();
+
+  if (mPass->mGeometry == GeometryMode::Polygon) {
+    if (const Maybe<gfx::Polygon>& geometry = aTraits.geometry()) {
+      gfx::Polygon polygon = geometry->ClipPolygon(aTraits.rect());
+      if (polygon.IsEmpty()) {
+        return true;
+      }
+      return AddShaderTriangles(instances, aTraits, &polygon);
+    }
+    return AddShaderTriangles(instances, aTraits);
+  }
+
+  typedef typename Traits::UnitQuadVertex UnitQuadVertex;
+  typedef typename Traits::UnitQuad UnitQuad;
+
+  UnitQuadVertex base = aTraits.MakeUnitQuadVertex();
+  auto data = aTraits.MakeVertexData(UnitQuad());
+  return instances->AddItem(base, data);
+}
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_RenderPassMLGPU_inl_h
diff --git a/gfx/layers/mlgpu/RenderPassMLGPU.cpp b/gfx/layers/mlgpu/RenderPassMLGPU.cpp
new file mode 100644
index 0000000000..1fd970e4a8
--- /dev/null
+++ b/gfx/layers/mlgpu/RenderPassMLGPU.cpp
@@ -0,0 +1,971 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "RenderPassMLGPU.h"
+#include "ContainerLayerMLGPU.h"
+#include "FrameBuilder.h"
+#include "ImageLayerMLGPU.h"
+#include "MaskOperation.h"
+#include "MLGDevice.h"
+#include "PaintedLayerMLGPU.h"
+#include "RenderViewMLGPU.h"
+#include "ShaderDefinitionsMLGPU.h"
+#include "ShaderDefinitionsMLGPU-inl.h"
+#include "SharedBufferMLGPU.h"
+#include "mozilla/layers/LayersHelpers.h"
+#include "mozilla/layers/LayersMessages.h"
+#include "RenderPassMLGPU-inl.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace gfx;
+
+ItemInfo::ItemInfo(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                   LayerMLGPU* aLayer, int32_t aSortOrder,
+                   const IntRect& aBounds, Maybe<Polygon>&& aGeometry)
+    : view(aView),
+      layer(aLayer),
+      type(RenderPassType::Unknown),
+      layerIndex(kInvalidResourceIndex),
+      sortOrder(aSortOrder),
+      bounds(aBounds),
+      geometry(std::move(aGeometry)) {
+  const Matrix4x4& transform = aLayer->GetLayer()->GetEffectiveTransform();
+
+  Matrix transform2D;
+  if (!geometry && transform.Is2D(&transform2D) &&
+      transform2D.IsRectilinear()) {
+    this->rectilinear = true;
+    if (transform2D.IsIntegerTranslation()) {
+      this->translation =
+          Some(IntPoint::Truncate(transform2D.GetTranslation()));
+    }
+  } else {
+    this->rectilinear = false;
+  }
+
+  // Layers can have arbitrary clips or transforms, and we can't use built-in
+  // scissor functionality when batching. Instead, pixel shaders will write
+  // transparent pixels for positions outside of the clip. Unfortunately that
+  // breaks z-buffering because the transparent pixels will still write to
+  // the depth buffer.
+  //
+  // To make this work, we clamp the final vertices in the vertex shader to
+  // the clip rect. We can only do this for rectilinear transforms. If a
+  // transform can produce a rotation or perspective change, then we might
+  // accidentally change the geometry. These items are not treated as
+  // opaque.
+  //
+  // Also, we someday want non-rectilinear items to be antialiased with DEAA,
+  // and we can't do this if the items are rendered front-to-back, since
+  // such items cannot be blended. (Though we could consider adding these
+  // items in two separate draw calls, one for DEAA and for not - that is
+  // definitely future work.)
+  if (aLayer->GetComputedOpacity() != 1.0f || aLayer->GetMask() ||
+      !aLayer->IsContentOpaque() || !rectilinear) {
+    this->opaque = false;
+    this->renderOrder = RenderOrder::BackToFront;
+  } else {
+    this->opaque = true;
+    this->renderOrder = aView->HasDepthBuffer() ? RenderOrder::FrontToBack
+                                                : RenderOrder::BackToFront;
+  }
+
+  this->type = RenderPassMLGPU::GetPreferredPassType(aBuilder, *this);
+}
+
+RenderPassType RenderPassMLGPU::GetPreferredPassType(FrameBuilder* aBuilder,
+                                                     const ItemInfo& aItem) {
+  LayerMLGPU* layer = aItem.layer;
+  switch (layer->GetType()) {
+    case Layer::TYPE_COLOR: {
+      if (aBuilder->GetDevice()->CanUseClearView() &&
+          aItem.HasRectTransformAndClip() && aItem.translation &&
+          aItem.opaque && !aItem.view->HasDepthBuffer()) {
+        // Note: we don't have ClearView set up to do depth buffer writes, so we
+        // exclude depth buffering from the test above.
+        return RenderPassType::ClearView;
+      }
+      return RenderPassType::SolidColor;
+    }
+    case Layer::TYPE_PAINTED: {
+      PaintedLayerMLGPU* painted = layer->AsPaintedLayerMLGPU();
+      if (painted->HasComponentAlpha()) {
+        return RenderPassType::ComponentAlpha;
+      }
+      return RenderPassType::SingleTexture;
+    }
+    case Layer::TYPE_CANVAS:
+      return RenderPassType::SingleTexture;
+    case Layer::TYPE_IMAGE: {
+      ImageHost* host = layer->AsTexturedLayerMLGPU()->GetImageHost();
+      TextureHost* texture = host->CurrentTextureHost();
+      if (texture->GetReadFormat() == SurfaceFormat::YUV ||
+          texture->GetReadFormat() == SurfaceFormat::NV12 ||
+          texture->GetReadFormat() == SurfaceFormat::P010 ||
+          texture->GetReadFormat() == SurfaceFormat::P016) {
+        return RenderPassType::Video;
+      }
+      return RenderPassType::SingleTexture;
+    }
+    case Layer::TYPE_CONTAINER:
+      return RenderPassType::RenderView;
+    default:
+      return RenderPassType::Unknown;
+  }
+}
+
+RefPtr<RenderPassMLGPU> RenderPassMLGPU::CreatePass(FrameBuilder* aBuilder,
+                                                    const ItemInfo& aItem) {
+  switch (aItem.type) {
+    case RenderPassType::SolidColor:
+      return MakeAndAddRef<SolidColorPass>(aBuilder, aItem);
+    case RenderPassType::SingleTexture:
+      return MakeAndAddRef<SingleTexturePass>(aBuilder, aItem);
+    case RenderPassType::RenderView:
+      return MakeAndAddRef<RenderViewPass>(aBuilder, aItem);
+    case RenderPassType::Video:
+      return MakeAndAddRef<VideoRenderPass>(aBuilder, aItem);
+    case RenderPassType::ComponentAlpha:
+      return MakeAndAddRef<ComponentAlphaPass>(aBuilder, aItem);
+    case RenderPassType::ClearView:
+      return MakeAndAddRef<ClearViewPass>(aBuilder, aItem);
+    default:
+      return nullptr;
+  }
+}
+
+RenderPassMLGPU::RenderPassMLGPU(FrameBuilder* aBuilder, const ItemInfo& aItem)
+    : mBuilder(aBuilder),
+      mDevice(aBuilder->GetDevice()),
+      mLayerBufferIndex(aBuilder->CurrentLayerBufferIndex()),
+      mMaskRectBufferIndex(kInvalidResourceIndex),
+      mPrepared(false) {}
+
+RenderPassMLGPU::~RenderPassMLGPU() = default;
+
+bool RenderPassMLGPU::IsCompatible(const ItemInfo& aItem) {
+  if (GetType() != aItem.type) {
+    return false;
+  }
+  if (mLayerBufferIndex != mBuilder->CurrentLayerBufferIndex()) {
+    return false;
+  }
+  return true;
+}
+
+bool RenderPassMLGPU::AcceptItem(ItemInfo& aInfo) {
+  MOZ_ASSERT(IsCompatible(aInfo));
+
+  if (!AddToPass(aInfo.layer, aInfo)) {
+    return false;
+  }
+
+  if (aInfo.renderOrder == RenderOrder::BackToFront) {
+    mAffectedRegion.OrWith(aInfo.bounds);
+    mAffectedRegion.SimplifyOutward(4);
+  }
+  return true;
+}
+
+bool RenderPassMLGPU::Intersects(const ItemInfo& aItem) {
+  MOZ_ASSERT(aItem.renderOrder == RenderOrder::BackToFront);
+  return !mAffectedRegion.Intersect(aItem.bounds).IsEmpty();
+}
+
+void RenderPassMLGPU::PrepareForRendering() { mPrepared = true; }
+
+ShaderRenderPass::ShaderRenderPass(FrameBuilder* aBuilder,
+                                   const ItemInfo& aItem)
+    : RenderPassMLGPU(aBuilder, aItem),
+      mGeometry(GeometryMode::Unknown),
+      mHasRectTransformAndClip(aItem.HasRectTransformAndClip()) {
+  mMask = aItem.layer->GetMask();
+  if (mMask) {
+    mMaskRectBufferIndex = mBuilder->CurrentMaskRectBufferIndex();
+  }
+}
+
+bool ShaderRenderPass::IsCompatible(const ItemInfo& aItem) {
+  MOZ_ASSERT(mGeometry != GeometryMode::Unknown);
+
+  if (!RenderPassMLGPU::IsCompatible(aItem)) {
+    return false;
+  }
+
+  // A masked batch cannot accept non-masked items, since the pixel shader
+  // bakes in whether a mask is present. Also, the pixel shader can only bind
+  // one specific mask at a time.
+  if (aItem.layer->GetMask() != mMask) {
+    return false;
+  }
+  if (mMask && mBuilder->CurrentMaskRectBufferIndex() != mMaskRectBufferIndex) {
+    return false;
+  }
+
+  // We key batches on this property, since we can use more efficient pixel
+  // shaders if we don't need to propagate a clip and a mask.
+  if (mHasRectTransformAndClip != aItem.HasRectTransformAndClip()) {
+    return false;
+  }
+
+  // We should be assured at this point, that if the item requires complex
+  // geometry, then it should have already been rejected from a unit-quad
+  // batch. Therefore this batch should be in polygon mode.
+  MOZ_ASSERT_IF(aItem.geometry.isSome(), mGeometry == GeometryMode::Polygon);
+  return true;
+}
+
+void ShaderRenderPass::SetGeometry(const ItemInfo& aItem, GeometryMode aMode) {
+  MOZ_ASSERT(mGeometry == GeometryMode::Unknown);
+
+  if (aMode == GeometryMode::Unknown) {
+    mGeometry = mHasRectTransformAndClip ? GeometryMode::UnitQuad
+                                         : GeometryMode::Polygon;
+  } else {
+    mGeometry = aMode;
+  }
+
+  // Since we process layers front-to-back, back-to-front items are
+  // in the wrong order. We address this by automatically reversing
+  // the buffers we use to build vertices.
+  if (aItem.renderOrder != RenderOrder::FrontToBack) {
+    mInstances.SetReversed();
+  }
+}
+
+void ShaderRenderPass::PrepareForRendering() {
+  if (mInstances.IsEmpty()) {
+    return;
+  }
+  if (!mDevice->GetSharedVertexBuffer()->Allocate(&mInstanceBuffer,
+                                                  mInstances) ||
+      !SetupPSBuffer0(GetOpacity()) || !OnPrepareBuffers()) {
+    return;
+  }
+  return RenderPassMLGPU::PrepareForRendering();
+}
+
+bool ShaderRenderPass::SetupPSBuffer0(float aOpacity) {
+  if (aOpacity == 1.0f && !HasMask()) {
+    mPSBuffer0 = mBuilder->GetDefaultMaskInfo();
+    return true;
+  }
+
+  MaskInformation cb(aOpacity, HasMask());
+  return mDevice->GetSharedPSBuffer()->Allocate(&mPSBuffer0, cb);
+}
+
+void ShaderRenderPass::ExecuteRendering() {
+  if (mInstances.IsEmpty()) {
+    return;
+  }
+
+  // Change the blend state if needed.
+  if (Maybe<MLGBlendState> blendState = GetBlendState()) {
+    mDevice->SetBlendState(blendState.value());
+  }
+
+  mDevice->SetPSConstantBuffer(0, &mPSBuffer0);
+  if (MaskOperation* mask = GetMask()) {
+    mDevice->SetPSTexture(kMaskLayerTextureSlot, mask->GetTexture());
+    mDevice->SetSamplerMode(kMaskSamplerSlot, SamplerMode::LinearClampToZero);
+  }
+
+  SetupPipeline();
+
+  if (mGeometry == GeometryMode::Polygon) {
+    mDevice->SetTopology(MLGPrimitiveTopology::UnitTriangle);
+  } else {
+    mDevice->SetTopology(MLGPrimitiveTopology::UnitQuad);
+  }
+  mDevice->SetVertexBuffer(1, &mInstanceBuffer);
+
+  if (mGeometry == GeometryMode::Polygon) {
+    mDevice->DrawInstanced(3, mInstanceBuffer.NumVertices(), 0, 0);
+  } else {
+    mDevice->DrawInstanced(4, mInstanceBuffer.NumVertices(), 0, 0);
+  }
+}
+
+static inline DeviceColor ComputeLayerColor(LayerMLGPU* aLayer,
+                                            const DeviceColor& aColor) {
+  float opacity = aLayer->GetComputedOpacity();
+  return DeviceColor(aColor.r * aColor.a * opacity,
+                     aColor.g * aColor.a * opacity,
+                     aColor.b * aColor.a * opacity, aColor.a * opacity);
+}
+
+ClearViewPass::ClearViewPass(FrameBuilder* aBuilder, const ItemInfo& aItem)
+    : RenderPassMLGPU(aBuilder, aItem), mView(aItem.view) {
+  // Note: we could write to the depth buffer, but since the depth buffer is
+  // disabled by default, we don't bother yet.
+  MOZ_ASSERT(!mView->HasDepthBuffer());
+
+  ColorLayer* colorLayer = aItem.layer->GetLayer()->AsColorLayer();
+  mColor = ComputeLayerColor(aItem.layer, colorLayer->GetColor());
+}
+
+bool ClearViewPass::IsCompatible(const ItemInfo& aItem) {
+  if (!RenderPassMLGPU::IsCompatible(aItem)) {
+    return false;
+  }
+
+  // These should be true if we computed a ClearView pass type.
+  MOZ_ASSERT(aItem.translation);
+  MOZ_ASSERT(aItem.opaque);
+  MOZ_ASSERT(aItem.HasRectTransformAndClip());
+
+  // Each call only supports a single color.
+  ColorLayer* colorLayer = aItem.layer->GetLayer()->AsColorLayer();
+  if (mColor != ComputeLayerColor(aItem.layer, colorLayer->GetColor())) {
+    return false;
+  }
+
+  // We don't support opacity here since it would not blend correctly.
+  MOZ_ASSERT(mColor.a == 1.0f);
+  return true;
+}
+
+bool ClearViewPass::AddToPass(LayerMLGPU* aItem, ItemInfo& aInfo) {
+  const LayerIntRegion& region = aItem->GetRenderRegion();
+  for (auto iter = region.RectIter(); !iter.Done(); iter.Next()) {
+    IntRect rect = iter.Get().ToUnknownRect();
+    rect += aInfo.translation.value();
+    rect -= mView->GetTargetOffset();
+    mRects.AppendElement(rect);
+  }
+  return true;
+}
+
+void ClearViewPass::ExecuteRendering() {
+  mDevice->ClearView(mDevice->GetRenderTarget(), mColor, mRects.Elements(),
+                     mRects.Length());
+}
+
+SolidColorPass::SolidColorPass(FrameBuilder* aBuilder, const ItemInfo& aItem)
+    : BatchRenderPass(aBuilder, aItem) {
+  SetDefaultGeometry(aItem);
+}
+
+bool SolidColorPass::AddToPass(LayerMLGPU* aLayer, ItemInfo& aInfo) {
+  MOZ_ASSERT(aLayer->GetType() == Layer::TYPE_COLOR);
+
+  ColorLayer* colorLayer = aLayer->GetLayer()->AsColorLayer();
+
+  Txn txn(this);
+
+  gfx::DeviceColor color = ComputeLayerColor(aLayer, colorLayer->GetColor());
+
+  const LayerIntRegion& region = aLayer->GetRenderRegion();
+  for (auto iter = region.RectIter(); !iter.Done(); iter.Next()) {
+    const IntRect rect = iter.Get().ToUnknownRect();
+    ColorTraits traits(aInfo, Rect(rect), color);
+
+    if (!txn.Add(traits)) {
+      return false;
+    }
+  }
+  return txn.Commit();
+}
+
+float SolidColorPass::GetOpacity() const {
+  // Note our pixel shader just ignores the opacity, since we baked it
+  // into our color values already. Just return 1, which ensures we can
+  // use the default constant buffer binding.
+  return 1.0f;
+}
+
+void SolidColorPass::SetupPipeline() {
+  if (mGeometry == GeometryMode::UnitQuad) {
+    mDevice->SetVertexShader(VertexShaderID::ColoredQuad);
+    mDevice->SetPixelShader(PixelShaderID::ColoredQuad);
+  } else {
+    mDevice->SetVertexShader(VertexShaderID::ColoredVertex);
+    mDevice->SetPixelShader(PixelShaderID::ColoredVertex);
+  }
+}
+
+TexturedRenderPass::TexturedRenderPass(FrameBuilder* aBuilder,
+                                       const ItemInfo& aItem)
+    : BatchRenderPass(aBuilder, aItem), mTextureFlags(TextureFlags::NO_FLAGS) {}
+
+TexturedRenderPass::Info::Info(const ItemInfo& aItem, PaintedLayerMLGPU* aLayer)
+    : item(aItem),
+      textureSize(aLayer->GetTexture()->GetSize()),
+      destOrigin(aLayer->GetDestOrigin()),
+      decomposeIntoNoRepeatRects(aLayer->MayResample()) {}
+
+TexturedRenderPass::Info::Info(const ItemInfo& aItem,
+                               TexturedLayerMLGPU* aLayer)
+    : item(aItem),
+      textureSize(aLayer->GetTexture()->GetSize()),
+      scale(aLayer->GetPictureScale()),
+      decomposeIntoNoRepeatRects(false) {}
+
+TexturedRenderPass::Info::Info(const ItemInfo& aItem,
+                               ContainerLayerMLGPU* aLayer)
+    : item(aItem),
+      textureSize(aLayer->GetTargetSize()),
+      destOrigin(aLayer->GetTargetOffset()),
+      decomposeIntoNoRepeatRects(false) {}
+
+bool TexturedRenderPass::AddItem(Txn& aTxn, const Info& aInfo,
+                                 const Rect& aDrawRect) {
+  if (mGeometry == GeometryMode::Polygon) {
+    // This path will not clamp the draw rect to the layer clip, so we can pass
+    // the draw rect texture rects straight through.
+    return AddClippedItem(aTxn, aInfo, aDrawRect);
+  }
+
+  const ItemInfo& item = aInfo.item;
+
+  MOZ_ASSERT(!item.geometry);
+  MOZ_ASSERT(item.HasRectTransformAndClip());
+  MOZ_ASSERT(mHasRectTransformAndClip);
+
+  const Matrix4x4& fullTransform =
+      item.layer->GetLayer()->GetEffectiveTransformForBuffer();
+  Matrix transform = fullTransform.As2D();
+  Matrix inverse = transform;
+  if (!inverse.Invert()) {
+    // Degenerate transforms are not visible, since there is no mapping to
+    // screen space. Just return without adding any draws.
+    return true;
+  }
+  MOZ_ASSERT(inverse.IsRectilinear());
+
+  // Transform the clip rect.
+  IntRect clipRect = item.layer->GetComputedClipRect().ToUnknownRect();
+  clipRect += item.view->GetTargetOffset();
+
+  // Clip and adjust the texture rect.
+  Rect localClip = inverse.TransformBounds(Rect(clipRect));
+  Rect clippedDrawRect = aDrawRect.Intersect(localClip);
+  if (clippedDrawRect.IsEmpty()) {
+    return true;
+  }
+
+  return AddClippedItem(aTxn, aInfo, clippedDrawRect);
+}
+
+bool TexturedRenderPass::AddClippedItem(Txn& aTxn, const Info& aInfo,
+                                        const gfx::Rect& aDrawRect) {
+  float xScale = 1.0;
+  float yScale = 1.0;
+  if (aInfo.scale) {
+    xScale = aInfo.scale->width;
+    yScale = aInfo.scale->height;
+  }
+
+  Point offset = aDrawRect.TopLeft() - aInfo.destOrigin;
+  Rect textureRect(offset.x * xScale, offset.y * yScale,
+                   aDrawRect.Width() * xScale, aDrawRect.Height() * yScale);
+
+  Rect textureCoords = TextureRectToCoords(textureRect, aInfo.textureSize);
+  if (mTextureFlags & TextureFlags::ORIGIN_BOTTOM_LEFT) {
+    textureCoords.MoveToY(1.0 - textureCoords.Y());
+    textureCoords.SetHeight(-textureCoords.Height());
+  }
+
+  if (!aInfo.decomposeIntoNoRepeatRects) {
+    // Fast, normal case, we can use the texture coordinates as-s and the caller
+    // will use a repeat sampler if needed.
+    TexturedTraits traits(aInfo.item, aDrawRect, textureCoords);
+    if (!aTxn.Add(traits)) {
+      return false;
+    }
+  } else {
+    Rect layerRects[4];
+    Rect textureRects[4];
+    size_t numRects = DecomposeIntoNoRepeatRects(aDrawRect, textureCoords,
+                                                 &layerRects, &textureRects);
+
+    for (size_t i = 0; i < numRects; i++) {
+      TexturedTraits traits(aInfo.item, layerRects[i], textureRects[i]);
+      if (!aTxn.Add(traits)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+SingleTexturePass::SingleTexturePass(FrameBuilder* aBuilder,
+                                     const ItemInfo& aItem)
+    : TexturedRenderPass(aBuilder, aItem),
+      mSamplerMode(SamplerMode::LinearClamp),
+      mOpacity(1.0f) {
+  SetDefaultGeometry(aItem);
+}
+
+bool SingleTexturePass::AddToPass(LayerMLGPU* aLayer, ItemInfo& aItem) {
+  RefPtr<TextureSource> texture;
+
+  SamplerMode sampler;
+  TextureFlags flags = TextureFlags::NO_FLAGS;
+  if (PaintedLayerMLGPU* paintedLayer = aLayer->AsPaintedLayerMLGPU()) {
+    if (paintedLayer->HasComponentAlpha()) {
+      return false;
+    }
+    texture = paintedLayer->GetTexture();
+    sampler = paintedLayer->GetSamplerMode();
+  } else if (TexturedLayerMLGPU* texLayer = aLayer->AsTexturedLayerMLGPU()) {
+    texture = texLayer->GetTexture();
+    sampler = FilterToSamplerMode(texLayer->GetSamplingFilter());
+    TextureHost* host = texLayer->GetImageHost()->CurrentTextureHost();
+    flags = host->GetFlags();
+  } else {
+    return false;
+  }
+
+  // We should not assign a texture-based layer to tiles if it has no texture.
+  MOZ_ASSERT(texture);
+
+  float opacity = aLayer->GetComputedOpacity();
+  if (mTexture) {
+    if (texture != mTexture) {
+      return false;
+    }
+    if (mSamplerMode != sampler) {
+      return false;
+    }
+    if (mOpacity != opacity) {
+      return false;
+    }
+    // Note: premultiplied, origin-bottom-left are already implied by the
+    // texture source.
+  } else {
+    mTexture = texture;
+    mSamplerMode = sampler;
+    mOpacity = opacity;
+    mTextureFlags = flags;
+  }
+
+  Txn txn(this);
+
+  // Note: these are two separate cases since no Info constructor takes in a
+  // base LayerMLGPU class.
+  if (PaintedLayerMLGPU* layer = aLayer->AsPaintedLayerMLGPU()) {
+    Info info(aItem, layer);
+    if (!AddItems(txn, info, layer->GetDrawRects())) {
+      return false;
+    }
+  } else if (TexturedLayerMLGPU* layer = aLayer->AsTexturedLayerMLGPU()) {
+    Info info(aItem, layer);
+    if (!AddItems(txn, info, layer->GetRenderRegion())) {
+      return false;
+    }
+  }
+
+  return txn.Commit();
+}
+
+Maybe<MLGBlendState> SingleTexturePass::GetBlendState() const {
+  return (mTextureFlags & TextureFlags::NON_PREMULTIPLIED)
+             ? Some(MLGBlendState::OverAndPremultiply)
+             : Some(MLGBlendState::Over);
+}
+
+void SingleTexturePass::SetupPipeline() {
+  MOZ_ASSERT(mTexture);
+
+  if (mGeometry == GeometryMode::UnitQuad) {
+    mDevice->SetVertexShader(VertexShaderID::TexturedQuad);
+  } else {
+    mDevice->SetVertexShader(VertexShaderID::TexturedVertex);
+  }
+
+  mDevice->SetPSTexture(0, mTexture);
+  mDevice->SetSamplerMode(kDefaultSamplerSlot, mSamplerMode);
+  switch (mTexture.get()->GetFormat()) {
+    case SurfaceFormat::B8G8R8A8:
+    case SurfaceFormat::R8G8B8A8:
+      if (mGeometry == GeometryMode::UnitQuad)
+        mDevice->SetPixelShader(PixelShaderID::TexturedQuadRGBA);
+      else
+        mDevice->SetPixelShader(PixelShaderID::TexturedVertexRGBA);
+      break;
+    default:
+      if (mGeometry == GeometryMode::UnitQuad)
+        mDevice->SetPixelShader(PixelShaderID::TexturedQuadRGB);
+      else
+        mDevice->SetPixelShader(PixelShaderID::TexturedVertexRGB);
+      break;
+  }
+}
+
+ComponentAlphaPass::ComponentAlphaPass(FrameBuilder* aBuilder,
+                                       const ItemInfo& aItem)
+    : TexturedRenderPass(aBuilder, aItem),
+      mOpacity(1.0f),
+      mSamplerMode(SamplerMode::LinearClamp) {
+  SetDefaultGeometry(aItem);
+}
+
+bool ComponentAlphaPass::AddToPass(LayerMLGPU* aLayer, ItemInfo& aItem) {
+  PaintedLayerMLGPU* layer = aLayer->AsPaintedLayerMLGPU();
+  MOZ_ASSERT(layer);
+
+  if (mTextureOnBlack) {
+    if (layer->GetTexture() != mTextureOnBlack ||
+        layer->GetTextureOnWhite() != mTextureOnWhite ||
+        layer->GetOpacity() != mOpacity ||
+        layer->GetSamplerMode() != mSamplerMode) {
+      return false;
+    }
+  } else {
+    mOpacity = layer->GetComputedOpacity();
+    mSamplerMode = layer->GetSamplerMode();
+    mTextureOnBlack = layer->GetTexture();
+    mTextureOnWhite = layer->GetTextureOnWhite();
+  }
+
+  Txn txn(this);
+
+  Info info(aItem, layer);
+  if (!AddItems(txn, info, layer->GetDrawRects())) {
+    return false;
+  }
+  return txn.Commit();
+}
+
+float ComponentAlphaPass::GetOpacity() const { return mOpacity; }
+
+void ComponentAlphaPass::SetupPipeline() {
+  TextureSource* textures[2] = {mTextureOnBlack, mTextureOnWhite};
+  MOZ_ASSERT(textures[0]);
+  MOZ_ASSERT(textures[1]);
+
+  if (mGeometry == GeometryMode::UnitQuad) {
+    mDevice->SetVertexShader(VertexShaderID::TexturedQuad);
+    mDevice->SetPixelShader(PixelShaderID::ComponentAlphaQuad);
+  } else {
+    mDevice->SetVertexShader(VertexShaderID::TexturedVertex);
+    mDevice->SetPixelShader(PixelShaderID::ComponentAlphaVertex);
+  }
+
+  mDevice->SetSamplerMode(kDefaultSamplerSlot, mSamplerMode);
+  mDevice->SetPSTextures(0, 2, textures);
+}
+
+VideoRenderPass::VideoRenderPass(FrameBuilder* aBuilder, const ItemInfo& aItem)
+    : TexturedRenderPass(aBuilder, aItem),
+      mSamplerMode(SamplerMode::LinearClamp),
+      mOpacity(1.0f) {
+  SetDefaultGeometry(aItem);
+}
+
+bool VideoRenderPass::AddToPass(LayerMLGPU* aLayer, ItemInfo& aItem) {
+  ImageLayerMLGPU* layer = aLayer->AsImageLayerMLGPU();
+  if (!layer) {
+    return false;
+  }
+
+  RefPtr<TextureHost> host = layer->GetImageHost()->CurrentTextureHost();
+  RefPtr<TextureSource> source = layer->GetTexture();
+  float opacity = layer->GetComputedOpacity();
+  SamplerMode sampler = FilterToSamplerMode(layer->GetSamplingFilter());
+
+  if (mHost) {
+    if (mHost != host) {
+      return false;
+    }
+    if (mTexture != source) {
+      return false;
+    }
+    if (mOpacity != opacity) {
+      return false;
+    }
+    if (mSamplerMode != sampler) {
+      return false;
+    }
+  } else {
+    mHost = host;
+    mTexture = source;
+    mOpacity = opacity;
+    mSamplerMode = sampler;
+  }
+  MOZ_ASSERT(!mTexture->AsBigImageIterator());
+  MOZ_ASSERT(!(mHost->GetFlags() & TextureFlags::NON_PREMULTIPLIED));
+  MOZ_ASSERT(!(mHost->GetFlags() & TextureFlags::ORIGIN_BOTTOM_LEFT));
+
+  Txn txn(this);
+
+  Info info(aItem, layer);
+  if (!AddItems(txn, info, layer->GetRenderRegion())) {
+    return false;
+  }
+  return txn.Commit();
+}
+
+void VideoRenderPass::SetupPipeline() {
+  YUVColorSpace colorSpace = YUVColorSpace::UNKNOWN;
+  switch (mHost->GetReadFormat()) {
+    case SurfaceFormat::YUV:
+    case SurfaceFormat::NV12:
+    case SurfaceFormat::P010:
+    case SurfaceFormat::P016:
+      colorSpace = mHost->GetYUVColorSpace();
+      break;
+    default:
+      MOZ_ASSERT_UNREACHABLE("Unexpected surface format in VideoRenderPass");
+      break;
+  }
+  MOZ_ASSERT(colorSpace != YUVColorSpace::UNKNOWN);
+
+  RefPtr<MLGBuffer> ps1 = mDevice->GetBufferForColorSpace(colorSpace);
+  if (!ps1) {
+    return;
+  }
+
+  RefPtr<MLGBuffer> ps2 =
+      mDevice->GetBufferForColorDepthCoefficient(mHost->GetColorDepth());
+  if (!ps2) {
+    return;
+  }
+
+  if (mGeometry == GeometryMode::UnitQuad) {
+    mDevice->SetVertexShader(VertexShaderID::TexturedQuad);
+  } else {
+    mDevice->SetVertexShader(VertexShaderID::TexturedVertex);
+  }
+
+  switch (mHost->GetReadFormat()) {
+    case SurfaceFormat::YUV: {
+      if (colorSpace == YUVColorSpace::Identity) {
+        if (mGeometry == GeometryMode::UnitQuad)
+          mDevice->SetPixelShader(PixelShaderID::TexturedQuadIdentityIMC4);
+        else
+          mDevice->SetPixelShader(PixelShaderID::TexturedVertexIdentityIMC4);
+      } else {
+        if (mGeometry == GeometryMode::UnitQuad)
+          mDevice->SetPixelShader(PixelShaderID::TexturedQuadIMC4);
+        else
+          mDevice->SetPixelShader(PixelShaderID::TexturedVertexIMC4);
+      }
+      mDevice->SetPSTexturesYUV(0, mTexture);
+      break;
+    }
+    case SurfaceFormat::NV12:
+    case SurfaceFormat::P010:
+    case SurfaceFormat::P016:
+      if (mGeometry == GeometryMode::UnitQuad)
+        mDevice->SetPixelShader(PixelShaderID::TexturedQuadNV12);
+      else
+        mDevice->SetPixelShader(PixelShaderID::TexturedVertexNV12);
+      mDevice->SetPSTexturesNV12(0, mTexture);
+      break;
+    default:
+      MOZ_ASSERT_UNREACHABLE("Unknown video format");
+      break;
+  }
+
+  mDevice->SetSamplerMode(kDefaultSamplerSlot, mSamplerMode);
+  mDevice->SetPSConstantBuffer(1, ps1);
+  mDevice->SetPSConstantBuffer(2, ps2);
+}
+
+RenderViewPass::RenderViewPass(FrameBuilder* aBuilder, const ItemInfo& aItem)
+    : TexturedRenderPass(aBuilder, aItem), mParentView(nullptr) {
+  mAssignedLayer = aItem.layer->AsContainerLayerMLGPU();
+
+  CompositionOp blendOp = mAssignedLayer->GetMixBlendMode();
+  if (BlendOpIsMixBlendMode(blendOp)) {
+    mBlendMode = Some(blendOp);
+  }
+
+  if (mBlendMode) {
+    // We do not have fast-path rect shaders for blending.
+    SetGeometry(aItem, GeometryMode::Polygon);
+  } else {
+    SetDefaultGeometry(aItem);
+  }
+}
+
+bool RenderViewPass::AddToPass(LayerMLGPU* aLayer, ItemInfo& aItem) {
+  // We bake in the layer ahead of time, which also guarantees the blend mode
+  // is baked in, as well as the geometry requirement.
+  if (mAssignedLayer != aLayer) {
+    return false;
+  }
+
+  mSource = mAssignedLayer->GetRenderTarget();
+  if (!mSource) {
+    return false;
+  }
+
+  mParentView = aItem.view;
+
+  Txn txn(this);
+
+  IntPoint offset = mAssignedLayer->GetTargetOffset();
+  IntSize size = mAssignedLayer->GetTargetSize();
+
+  // Clamp the visible region to the texture size.
+  nsIntRegion visible = mAssignedLayer->GetRenderRegion().ToUnknownRegion();
+  visible.AndWith(IntRect(offset, size));
+
+  Info info(aItem, mAssignedLayer);
+  if (!AddItems(txn, info, visible)) {
+    return false;
+  }
+  return txn.Commit();
+}
+
+float RenderViewPass::GetOpacity() const {
+  return mAssignedLayer->GetLayer()->GetEffectiveOpacity();
+}
+
+bool RenderViewPass::OnPrepareBuffers() {
+  if (mBlendMode && !PrepareBlendState()) {
+    return false;
+  }
+  return true;
+}
+
+static inline PixelShaderID GetShaderForBlendMode(CompositionOp aOp) {
+  switch (aOp) {
+    case CompositionOp::OP_MULTIPLY:
+      return PixelShaderID::BlendMultiply;
+    case CompositionOp::OP_SCREEN:
+      return PixelShaderID::BlendScreen;
+    case CompositionOp::OP_OVERLAY:
+      return PixelShaderID::BlendOverlay;
+    case CompositionOp::OP_DARKEN:
+      return PixelShaderID::BlendDarken;
+    case CompositionOp::OP_LIGHTEN:
+      return PixelShaderID::BlendLighten;
+    case CompositionOp::OP_COLOR_DODGE:
+      return PixelShaderID::BlendColorDodge;
+    case CompositionOp::OP_COLOR_BURN:
+      return PixelShaderID::BlendColorBurn;
+    case CompositionOp::OP_HARD_LIGHT:
+      return PixelShaderID::BlendHardLight;
+    case CompositionOp::OP_SOFT_LIGHT:
+      return PixelShaderID::BlendSoftLight;
+    case CompositionOp::OP_DIFFERENCE:
+      return PixelShaderID::BlendDifference;
+    case CompositionOp::OP_EXCLUSION:
+      return PixelShaderID::BlendExclusion;
+    case CompositionOp::OP_HUE:
+      return PixelShaderID::BlendHue;
+    case CompositionOp::OP_SATURATION:
+      return PixelShaderID::BlendSaturation;
+    case CompositionOp::OP_COLOR:
+      return PixelShaderID::BlendColor;
+    case CompositionOp::OP_LUMINOSITY:
+      return PixelShaderID::BlendLuminosity;
+    default:
+      MOZ_ASSERT_UNREACHABLE("Unexpected blend mode");
+      return PixelShaderID::TexturedVertexRGBA;
+  }
+}
+
+bool RenderViewPass::PrepareBlendState() {
+  Rect visibleRect(
+      mAssignedLayer->GetRenderRegion().GetBounds().ToUnknownRect());
+  IntRect clipRect(mAssignedLayer->GetComputedClipRect().ToUnknownRect());
+  const Matrix4x4& transform =
+      mAssignedLayer->GetLayer()->GetEffectiveTransformForBuffer();
+
+  // Note that we must use our parent RenderView for this calculation,
+  // since we're copying the backdrop, not our actual local target.
+  IntRect rtRect(mParentView->GetTargetOffset(), mParentView->GetSize());
+
+  Matrix4x4 backdropTransform;
+  mBackdropCopyRect = ComputeBackdropCopyRect(visibleRect, clipRect, transform,
+                                              rtRect, &backdropTransform);
+
+  AutoBufferUpload<BlendVertexShaderConstants> cb;
+  if (!mDevice->GetSharedVSBuffer()->Allocate(&mBlendConstants, &cb)) {
+    return false;
+  }
+  memcpy(cb->backdropTransform, &backdropTransform._11, 64);
+  return true;
+}
+
+void RenderViewPass::SetupPipeline() {
+  if (mBlendMode) {
+    RefPtr<MLGRenderTarget> backdrop = mParentView->GetRenderTarget();
+    MOZ_ASSERT(mDevice->GetRenderTarget() == backdrop);
+
+    RefPtr<MLGTexture> copy = mDevice->CreateTexture(
+        mBackdropCopyRect.Size(), SurfaceFormat::B8G8R8A8, MLGUsage::Default,
+        MLGTextureFlags::ShaderResource);
+    if (!copy) {
+      return;
+    }
+
+    mDevice->CopyTexture(copy, IntPoint(0, 0), backdrop->GetTexture(),
+                         mBackdropCopyRect);
+
+    MOZ_ASSERT(mGeometry == GeometryMode::Polygon);
+    mDevice->SetVertexShader(VertexShaderID::BlendVertex);
+    mDevice->SetPixelShader(GetShaderForBlendMode(mBlendMode.value()));
+    mDevice->SetVSConstantBuffer(kBlendConstantBufferSlot, &mBlendConstants);
+    mDevice->SetPSTexture(1, copy);
+  } else {
+    if (mGeometry == GeometryMode::UnitQuad) {
+      mDevice->SetVertexShader(VertexShaderID::TexturedQuad);
+      mDevice->SetPixelShader(PixelShaderID::TexturedQuadRGBA);
+    } else {
+      mDevice->SetVertexShader(VertexShaderID::TexturedVertex);
+      mDevice->SetPixelShader(PixelShaderID::TexturedVertexRGBA);
+    }
+  }
+
+  mDevice->SetPSTexture(0, mSource->GetTexture());
+  mDevice->SetSamplerMode(kDefaultSamplerSlot, SamplerMode::LinearClamp);
+}
+
+void RenderViewPass::ExecuteRendering() {
+  if (mAssignedLayer->NeedsSurfaceCopy()) {
+    RenderWithBackdropCopy();
+    return;
+  }
+
+  TexturedRenderPass::ExecuteRendering();
+}
+
+void RenderViewPass::RenderWithBackdropCopy() {
+  MOZ_ASSERT(mAssignedLayer->NeedsSurfaceCopy());
+
+  DebugOnly<Matrix> transform2d;
+  const Matrix4x4& transform = mAssignedLayer->GetEffectiveTransform();
+  MOZ_ASSERT(transform.Is2D(&transform2d) &&
+             !gfx::ThebesMatrix(transform2d).HasNonIntegerTranslation());
+
+  IntPoint translation = IntPoint::Truncate(transform._41, transform._42);
+
+  RenderViewMLGPU* childView = mAssignedLayer->GetRenderView();
+
+  IntRect visible =
+      mAssignedLayer->GetRenderRegion().GetBounds().ToUnknownRect();
+  IntRect sourceRect = visible + translation - mParentView->GetTargetOffset();
+  IntPoint destPoint = visible.TopLeft() - childView->GetTargetOffset();
+
+  RefPtr<MLGTexture> dest = mAssignedLayer->GetRenderTarget()->GetTexture();
+  RefPtr<MLGTexture> source = mParentView->GetRenderTarget()->GetTexture();
+
+  // Clamp the source rect to the source texture size.
+  sourceRect = sourceRect.Intersect(IntRect(IntPoint(0, 0), source->GetSize()));
+
+  // Clamp the source rect to the destination texture size.
+  IntRect destRect(destPoint, sourceRect.Size());
+  destRect = destRect.Intersect(IntRect(IntPoint(0, 0), dest->GetSize()));
+  sourceRect =
+      sourceRect.Intersect(IntRect(sourceRect.TopLeft(), destRect.Size()));
+
+  mDevice->CopyTexture(dest, destPoint, source, sourceRect);
+  childView->RenderAfterBackdropCopy();
+  mParentView->RestoreDeviceState();
+  TexturedRenderPass::ExecuteRendering();
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/RenderPassMLGPU.h b/gfx/layers/mlgpu/RenderPassMLGPU.h
new file mode 100644
index 0000000000..55739953ea
--- /dev/null
+++ b/gfx/layers/mlgpu/RenderPassMLGPU.h
@@ -0,0 +1,439 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_GFX_RENDERPASSMLGPU_H
+#define MOZILLA_GFX_RENDERPASSMLGPU_H
+
+#include "LayerMLGPU.h"
+#include "LayerManagerMLGPU.h"
+#include "ShaderDefinitionsMLGPU.h"
+#include "SharedBufferMLGPU.h"
+#include "StagingBuffer.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace mlg;
+
+class RenderViewMLGPU;
+
+enum class RenderPassType {
+  ClearView,
+  SolidColor,
+  SingleTexture,
+  RenderView,
+  Video,
+  ComponentAlpha,
+  Unknown
+};
+
+enum class RenderOrder {
+  // Used for all items when not using a depth buffer. Otherwise, used for
+  // items that may draw transparent pixels.
+  BackToFront,
+
+  // Only used when the depth buffer is enabled, and only for items that are
+  // guaranteed to only draw opaque pixels.
+  FrontToBack
+};
+
+static const uint32_t kInvalidResourceIndex = uint32_t(-1);
+
+struct ItemInfo final {
+  ItemInfo(FrameBuilder* aBuilder, RenderViewMLGPU* aView, LayerMLGPU* aLayer,
+           int32_t aSortOrder, const gfx::IntRect& aBounds,
+           Maybe<gfx::Polygon>&& aGeometry);
+
+  // Return true if a layer can be clipped by the vertex shader; false
+  // otherwise. Any kind of textured mask or non-rectilinear transform
+  // will cause this to return false.
+  bool HasRectTransformAndClip() const {
+    return rectilinear && !layer->GetMask();
+  }
+
+  RenderViewMLGPU* view;
+  LayerMLGPU* layer;
+  RenderPassType type;
+  uint32_t layerIndex;
+  int32_t sortOrder;
+  gfx::IntRect bounds;
+  RenderOrder renderOrder;
+  Maybe<gfx::Polygon> geometry;
+
+  // Set only when the transform is a 2D integer translation.
+  Maybe<gfx::IntPoint> translation;
+
+  // Set when the item bounds will occlude anything below it.
+  bool opaque;
+
+  // Set when the item's transform is 2D and rectilinear.
+  bool rectilinear;
+};
+
+// Base class for anything that can render in a batch to the GPU.
+class RenderPassMLGPU {
+  NS_INLINE_DECL_REFCOUNTING(RenderPassMLGPU)
+
+ public:
+  static RenderPassType GetPreferredPassType(FrameBuilder* aBuilder,
+                                             const ItemInfo& aInfo);
+
+  static RefPtr<RenderPassMLGPU> CreatePass(FrameBuilder* aBuilder,
+                                            const ItemInfo& aInfo);
+
+  // Return true if this pass is compatible with the given item, false
+  // otherwise. This does not guarantee the pass will accept the item,
+  // but does guarantee we can try.
+  virtual bool IsCompatible(const ItemInfo& aItem);
+
+  virtual RenderPassType GetType() const = 0;
+
+  // Return true if the layer was compatible with and added to this pass,
+  // false otherwise.
+  bool AcceptItem(ItemInfo& aInfo);
+
+  // Prepare constants buffers and textures.
+  virtual void PrepareForRendering();
+
+  // Execute this render pass to the currently selected surface.
+  virtual void ExecuteRendering() = 0;
+
+  virtual Maybe<MLGBlendState> GetBlendState() const { return Nothing(); }
+
+  size_t GetLayerBufferIndex() const { return mLayerBufferIndex; }
+  Maybe<uint32_t> GetMaskRectBufferIndex() const {
+    return mMaskRectBufferIndex == kInvalidResourceIndex
+               ? Nothing()
+               : Some(mMaskRectBufferIndex);
+  }
+
+  // Returns true if this pass overlaps the affected region of an item. This
+  // only ever returns true for transparent items and transparent batches,
+  // and should not be used otherwise.
+  bool Intersects(const ItemInfo& aItem);
+
+  // Returns true if pass has been successfully prepared.
+  bool IsPrepared() const { return mPrepared; }
+
+ protected:
+  RenderPassMLGPU(FrameBuilder* aBuilder, const ItemInfo& aItem);
+  virtual ~RenderPassMLGPU();
+
+  // Return true if the item was consumed, false otherwise.
+  virtual bool AddToPass(LayerMLGPU* aItem, ItemInfo& aInfo) = 0;
+
+ protected:
+  enum class GeometryMode { Unknown, UnitQuad, Polygon };
+
+ protected:
+  FrameBuilder* mBuilder;
+  RefPtr<MLGDevice> mDevice;
+  size_t mLayerBufferIndex;
+  size_t mMaskRectBufferIndex;
+  gfx::IntRegion mAffectedRegion;
+  bool mPrepared;
+};
+
+// Shader-based render passes execute a draw call, vs. non-shader passes that
+// use non-shader APIs (like ClearView).
+class ShaderRenderPass : public RenderPassMLGPU {
+ public:
+  ShaderRenderPass(FrameBuilder* aBuilder, const ItemInfo& aItem);
+
+  // Used by ShaderDefinitions for writing traits.
+  VertexStagingBuffer* GetInstances() { return &mInstances; }
+
+  bool IsCompatible(const ItemInfo& aItem) override;
+  void PrepareForRendering() override;
+  void ExecuteRendering() override;
+
+  Maybe<MLGBlendState> GetBlendState() const override {
+    return Some(MLGBlendState::Over);
+  }
+
+ protected:
+  // If this batch has a uniform opacity, return it here. Otherwise this should
+  // return 1.0.
+  virtual float GetOpacity() const = 0;
+
+  // Set any components of the pipeline that won't be handled by
+  // ExecuteRendering. This is called only once even if multiple draw calls
+  // are issued.
+  virtual void SetupPipeline() = 0;
+
+ protected:
+  // Set the geometry this pass will use. This must be called by every
+  // derived constructor. Use GeometryMode::Unknown to pick the default
+  // behavior: UnitQuads for rectilinear transform+clips, and polygons
+  // otherwise.
+  void SetGeometry(const ItemInfo& aItem, GeometryMode aMode);
+
+  void SetDefaultGeometry(const ItemInfo& aItem) {
+    SetGeometry(aItem, GeometryMode::Unknown);
+  }
+
+  // Called after PrepareForRendering() has finished. If this returns false,
+  // PrepareForRendering() will return false.
+  virtual bool OnPrepareBuffers() { return true; }
+
+  // Prepare the mask/opacity buffer bound in most pixel shaders.
+  bool SetupPSBuffer0(float aOpacity);
+
+  bool HasMask() const { return !!mMask; }
+  MaskOperation* GetMask() const { return mMask; }
+
+ protected:
+  GeometryMode mGeometry;
+  RefPtr<MaskOperation> mMask;
+  bool mHasRectTransformAndClip;
+
+  VertexStagingBuffer mInstances;
+  VertexBufferSection mInstanceBuffer;
+
+  ConstantBufferSection mPSBuffer0;
+};
+
+// This contains various helper functions for building vertices and shader
+// inputs for layers.
+template <typename Traits>
+class BatchRenderPass : public ShaderRenderPass {
+ public:
+  BatchRenderPass(FrameBuilder* aBuilder, const ItemInfo& aItem)
+      : ShaderRenderPass(aBuilder, aItem) {}
+
+ protected:
+  // It is tricky to determine ahead of time whether or not we'll have enough
+  // room in our buffers to hold all draw commands for a layer, especially
+  // since layers can have multiple draw rects. We don't want to draw one rect,
+  // reject the item, then redraw the same rect again in another batch.
+  // To deal with this we use a transaction approach and reject the transaction
+  // if we couldn't add everything.
+  class Txn final {
+   public:
+    explicit Txn(BatchRenderPass* aPass)
+        : mPass(aPass), mPrevInstancePos(aPass->mInstances.GetPosition()) {}
+
+    bool Add(const Traits& aTraits) {
+      if (!AddImpl(aTraits)) {
+        return Fail();
+      }
+      return true;
+    }
+
+    // Add an item based on a draw rect, layer, and optional geometry. This is
+    // defined in RenderPassMLGPU-inl.h, since it needs access to
+    // ShaderDefinitionsMLGPU-inl.h.
+    bool AddImpl(const Traits& aTraits);
+
+    bool Fail() {
+      MOZ_ASSERT(!mStatus.isSome() || !mStatus.value());
+      mStatus = Some(false);
+      return false;
+    }
+
+    bool Commit() {
+      MOZ_ASSERT(!mStatus.isSome() || !mStatus.value());
+      if (mStatus.isSome()) {
+        return false;
+      }
+      mStatus = Some(true);
+      return true;
+    }
+
+    ~Txn() {
+      if (!mStatus.isSome() || !mStatus.value()) {
+        mPass->mInstances.RestorePosition(mPrevInstancePos);
+      }
+    }
+
+   private:
+    BatchRenderPass* mPass;
+    VertexStagingBuffer::Position mPrevVertexPos;
+    VertexStagingBuffer::Position mPrevItemPos;
+    ConstantStagingBuffer::Position mPrevInstancePos;
+    Maybe<bool> mStatus;
+  };
+};
+
+// Shaders which sample from a texture should inherit from this.
+class TexturedRenderPass : public BatchRenderPass<TexturedTraits> {
+ public:
+  explicit TexturedRenderPass(FrameBuilder* aBuilder, const ItemInfo& aItem);
+
+ protected:
+  struct Info final {
+    Info(const ItemInfo& aItem, PaintedLayerMLGPU* aLayer);
+    Info(const ItemInfo& aItem, TexturedLayerMLGPU* aLayer);
+    Info(const ItemInfo& aItem, ContainerLayerMLGPU* aLayer);
+
+    const ItemInfo& item;
+    gfx::IntSize textureSize;
+    gfx::Point destOrigin;
+    Maybe<gfx::Size> scale;
+    bool decomposeIntoNoRepeatRects;
+  };
+
+  // Add a set of draw rects based on a visible region. The texture size and
+  // scaling factor are used to compute uv-coordinates.
+  //
+  // The origin is the offset from the draw rect to the layer bounds. You can
+  // also think of it as the translation from layer space into texture space,
+  // pre-scaling. For example, ImageLayers use the texture bounds as their
+  // draw rect, so the origin will be (0, 0). ContainerLayer intermediate
+  // surfaces, on the other hand, are relative to the target offset of the
+  // layer. In all cases the visible region may be partially occluded, so
+  // knowing the true origin is important.
+  template <typename RegionType>
+  bool AddItems(Txn& aTxn, const Info& aInfo, const RegionType& aDrawRegion) {
+    for (auto iter = aDrawRegion.RectIter(); !iter.Done(); iter.Next()) {
+      gfx::Rect drawRect = gfx::Rect(iter.Get().ToUnknownRect());
+      if (!AddItem(aTxn, aInfo, drawRect)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+ private:
+  // Add a draw instance to the given destination rect. Texture coordinates
+  // are built from the given texture size, optional scaling factor, and
+  // texture origin relative to the draw rect. This will ultimately call
+  // AddClippedItem, potentially clipping the draw rect if needed.
+  bool AddItem(Txn& aTxn, const Info& aInfo, const gfx::Rect& aDrawRect);
+
+  // Add an item that has gone through any necessary clipping already. This
+  // is the final destination for handling textured items.
+  bool AddClippedItem(Txn& aTxn, const Info& aInfo, const gfx::Rect& aDrawRect);
+
+ protected:
+  TextureFlags mTextureFlags;
+};
+
+// This is only available when MLGDevice::CanUseClearView returns true.
+class ClearViewPass final : public RenderPassMLGPU {
+ public:
+  ClearViewPass(FrameBuilder* aBuilder, const ItemInfo& aItem);
+
+  bool IsCompatible(const ItemInfo& aItem) override;
+  void ExecuteRendering() override;
+
+  RenderPassType GetType() const override { return RenderPassType::ClearView; }
+
+ private:
+  bool AddToPass(LayerMLGPU* aItem, ItemInfo& aInfo) override;
+
+ private:
+  // Note: Not a RefPtr since this would create a cycle.
+  RenderViewMLGPU* mView;
+  gfx::DeviceColor mColor;
+  nsTArray<gfx::IntRect> mRects;
+};
+
+// SolidColorPass is used when ClearViewPass is not available, or when
+// the layer has masks, or subpixel or complex transforms.
+class SolidColorPass final : public BatchRenderPass<ColorTraits> {
+ public:
+  explicit SolidColorPass(FrameBuilder* aBuilder, const ItemInfo& aItem);
+
+  RenderPassType GetType() const override { return RenderPassType::SolidColor; }
+
+ private:
+  bool AddToPass(LayerMLGPU* aItem, ItemInfo& aInfo) override;
+  void SetupPipeline() override;
+  float GetOpacity() const override;
+};
+
+class SingleTexturePass final : public TexturedRenderPass {
+ public:
+  explicit SingleTexturePass(FrameBuilder* aBuilder, const ItemInfo& aItem);
+
+  RenderPassType GetType() const override {
+    return RenderPassType::SingleTexture;
+  }
+
+ private:
+  bool AddToPass(LayerMLGPU* aItem, ItemInfo& aInfo) override;
+  void SetupPipeline() override;
+  float GetOpacity() const override { return mOpacity; }
+  Maybe<MLGBlendState> GetBlendState() const override;
+
+ private:
+  RefPtr<TextureSource> mTexture;
+  SamplerMode mSamplerMode;
+  float mOpacity;
+};
+
+class ComponentAlphaPass final : public TexturedRenderPass {
+ public:
+  explicit ComponentAlphaPass(FrameBuilder* aBuilder, const ItemInfo& aItem);
+
+  RenderPassType GetType() const override {
+    return RenderPassType::ComponentAlpha;
+  }
+
+ private:
+  bool AddToPass(LayerMLGPU* aItem, ItemInfo& aInfo) override;
+  void SetupPipeline() override;
+  float GetOpacity() const override;
+  Maybe<MLGBlendState> GetBlendState() const override {
+    return Some(MLGBlendState::ComponentAlpha);
+  }
+
+ private:
+  float mOpacity;
+  SamplerMode mSamplerMode;
+  RefPtr<TextureSource> mTextureOnBlack;
+  RefPtr<TextureSource> mTextureOnWhite;
+};
+
+class VideoRenderPass final : public TexturedRenderPass {
+ public:
+  explicit VideoRenderPass(FrameBuilder* aBuilder, const ItemInfo& aItem);
+
+  RenderPassType GetType() const override { return RenderPassType::Video; }
+
+ private:
+  bool AddToPass(LayerMLGPU* aItem, ItemInfo& aInfo) override;
+  void SetupPipeline() override;
+  float GetOpacity() const override { return mOpacity; }
+
+ private:
+  RefPtr<TextureHost> mHost;
+  RefPtr<TextureSource> mTexture;
+  SamplerMode mSamplerMode;
+  float mOpacity;
+};
+
+class RenderViewPass final : public TexturedRenderPass {
+ public:
+  RenderViewPass(FrameBuilder* aBuilder, const ItemInfo& aItem);
+
+  RenderPassType GetType() const override { return RenderPassType::RenderView; }
+
+ private:
+  bool AddToPass(LayerMLGPU* aItem, ItemInfo& aInfo) override;
+  void SetupPipeline() override;
+  bool OnPrepareBuffers() override;
+  void ExecuteRendering() override;
+  float GetOpacity() const override;
+  bool PrepareBlendState();
+  void RenderWithBackdropCopy();
+
+ private:
+  ConstantBufferSection mBlendConstants;
+  ContainerLayerMLGPU* mAssignedLayer;
+  RefPtr<MLGRenderTarget> mSource;
+  // Note: we don't use RefPtr here since that would cause a cycle. RenderViews
+  // and RenderPasses are both scoped to the frame anyway.
+  RenderViewMLGPU* mParentView;
+  gfx::IntRect mBackdropCopyRect;
+  Maybe<gfx::CompositionOp> mBlendMode;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif
diff --git a/gfx/layers/mlgpu/RenderViewMLGPU.cpp b/gfx/layers/mlgpu/RenderViewMLGPU.cpp
new file mode 100644
index 0000000000..f6aceadeb5
--- /dev/null
+++ b/gfx/layers/mlgpu/RenderViewMLGPU.cpp
@@ -0,0 +1,549 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "RenderViewMLGPU.h"
+#include "ContainerLayerMLGPU.h"
+#include "FrameBuilder.h"
+#include "mozilla/StaticPrefs_layers.h"
+#include "LayersHelpers.h"
+#include "MLGDevice.h"
+#include "RenderPassMLGPU.h"
+#include "ShaderDefinitionsMLGPU.h"
+#include "Units.h"
+#include "UnitTransforms.h"
+#include "UtilityMLGPU.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace gfx;
+
+RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder,
+                                 MLGRenderTarget* aTarget,
+                                 const nsIntRegion& aInvalidRegion)
+    : RenderViewMLGPU(aBuilder, nullptr) {
+  mTarget = aTarget;
+  mInvalidBounds = aInvalidRegion.GetBounds();
+
+  // The clear region on the layer manager is the area that must be clear after
+  // we finish drawing.
+  mPostClearRegion = aBuilder->GetManager()->GetRegionToClear();
+
+  // Clamp the post-clear region to the invalid bounds, since clears don't go
+  // through the scissor rect if using ClearView.
+  mPostClearRegion.AndWith(mInvalidBounds);
+
+  // Since the post-clear will occlude everything, we include it in the final
+  // opaque area.
+  mOccludedRegion.OrWith(ViewAs<LayerPixel>(
+      mPostClearRegion,
+      PixelCastJustification::RenderTargetIsParentLayerForRoot));
+
+  AL_LOG("RenderView %p root with invalid area %s, clear area %s\n", this,
+         Stringify(mInvalidBounds).c_str(),
+         Stringify(mPostClearRegion).c_str());
+}
+
+RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder,
+                                 ContainerLayerMLGPU* aContainer,
+                                 RenderViewMLGPU* aParent)
+    : RenderViewMLGPU(aBuilder, aParent) {
+  mContainer = aContainer;
+  mTargetOffset = aContainer->GetTargetOffset();
+  mInvalidBounds = aContainer->GetInvalidRect();
+  MOZ_ASSERT(!mInvalidBounds.IsEmpty());
+
+  AL_LOG("RenderView %p starting with container %p and invalid area %s\n", this,
+         aContainer->GetLayer(), Stringify(mInvalidBounds).c_str());
+
+  mContainer->SetRenderView(this);
+}
+
+RenderViewMLGPU::RenderViewMLGPU(FrameBuilder* aBuilder,
+                                 RenderViewMLGPU* aParent)
+    : mBuilder(aBuilder),
+      mDevice(aBuilder->GetDevice()),
+      mParent(aParent),
+      mContainer(nullptr),
+      mFinishedBuilding(false),
+      mCurrentLayerBufferIndex(kInvalidResourceIndex),
+      mCurrentMaskRectBufferIndex(kInvalidResourceIndex),
+      mCurrentDepthMode(MLGDepthTestMode::Disabled),
+      mNextSortIndex(1),
+      mUseDepthBuffer(
+          StaticPrefs::layers_mlgpu_enable_depth_buffer_AtStartup()),
+      mDepthBufferNeedsClear(false) {
+  if (aParent) {
+    aParent->AddChild(this);
+  }
+}
+
+RenderViewMLGPU::~RenderViewMLGPU() {
+  for (const auto& child : mChildren) {
+    child->mParent = nullptr;
+  }
+}
+
+IntSize RenderViewMLGPU::GetSize() const {
+  MOZ_ASSERT(mFinishedBuilding);
+  return mTarget->GetSize();
+}
+
+MLGRenderTarget* RenderViewMLGPU::GetRenderTarget() const {
+  MOZ_ASSERT(mFinishedBuilding);
+  return mTarget;
+}
+
+void RenderViewMLGPU::AddChild(RenderViewMLGPU* aParent) {
+  mChildren.push_back(aParent);
+}
+
+void RenderViewMLGPU::Render() {
+  // We render views depth-first to minimize render target switching.
+  for (const auto& child : mChildren) {
+    child->Render();
+  }
+
+  // If the view requires a surface copy (of its backdrop), then we delay
+  // rendering it until it is added to a batch.
+  if (mContainer && mContainer->NeedsSurfaceCopy()) {
+    return;
+  }
+  ExecuteRendering();
+}
+
+void RenderViewMLGPU::RenderAfterBackdropCopy() {
+  MOZ_ASSERT(mContainer && mContainer->NeedsSurfaceCopy());
+
+  // Update the invalid bounds based on the container's visible region. This
+  // of course won't affect the prepared pipeline, but it will change the
+  // scissor rect in SetDeviceState.
+  mInvalidBounds = mContainer->GetRenderRegion().GetBounds().ToUnknownRect() -
+                   GetTargetOffset();
+
+  ExecuteRendering();
+}
+
+void RenderViewMLGPU::FinishBuilding() {
+  MOZ_ASSERT(!mFinishedBuilding);
+  mFinishedBuilding = true;
+
+  if (mContainer) {
+    MOZ_ASSERT(!mTarget);
+
+    MLGRenderTargetFlags flags = MLGRenderTargetFlags::Default;
+    if (mUseDepthBuffer) {
+      flags |= MLGRenderTargetFlags::ZBuffer;
+    }
+    mTarget = mContainer->UpdateRenderTarget(mDevice, flags);
+  }
+}
+
+void RenderViewMLGPU::AddItem(LayerMLGPU* aItem, const IntRect& aRect,
+                              Maybe<Polygon>&& aGeometry) {
+  AL_LOG("RenderView %p analyzing layer %p\n", this, aItem->GetLayer());
+
+  // If the item is not visible at all, skip it.
+  if (aItem->GetComputedOpacity() == 0.0f) {
+    AL_LOG("RenderView %p culling item %p with no opacity\n", this,
+           aItem->GetLayer());
+    return;
+  }
+
+  // When using the depth buffer, the z-index for items is important.
+  //
+  // Sort order starts at 1 and goes to positive infinity, with smaller values
+  // being closer to the screen. Our viewport is the same, with anything
+  // outside of [0.0, 1.0] being culled, and lower values occluding higher
+  // values. To make this work our projection transform scales the z-axis.
+  // Note that we do not use 0 as a sorting index (when depth-testing is
+  // enabled) because this would result in a z-value of 1.0, which would be
+  // culled.
+  ItemInfo info(mBuilder, this, aItem, mNextSortIndex++, aRect,
+                std::move(aGeometry));
+
+  // If the item is not visible, or we can't add it to the layer constant
+  // buffer for some reason, bail out.
+  if (!UpdateVisibleRegion(info) || !mBuilder->AddLayerToConstantBuffer(info)) {
+    AL_LOG("RenderView %p culled item %p!\n", this, aItem->GetLayer());
+    return;
+  }
+
+  // We support all layer types now.
+  MOZ_ASSERT(info.type != RenderPassType::Unknown);
+
+  if (info.renderOrder == RenderOrder::FrontToBack) {
+    AddItemFrontToBack(aItem, info);
+  } else {
+    AddItemBackToFront(aItem, info);
+  }
+}
+
+bool RenderViewMLGPU::UpdateVisibleRegion(ItemInfo& aItem) {
+  // If the item has some kind of complex transform, we perform a very
+  // simple occlusion test and move on. We using a depth buffer we skip
+  // CPU-based occlusion culling as well, since the GPU will do most of our
+  // culling work for us.
+  if (mUseDepthBuffer || !aItem.translation ||
+      !StaticPrefs::layers_mlgpu_enable_cpu_occlusion_AtStartup()) {
+    // Update the render region even if we won't compute visibility, since some
+    // layer types (like Canvas and Image) need to have the visible region
+    // clamped.
+    LayerIntRegion region = aItem.layer->GetShadowVisibleRegion();
+    aItem.layer->SetRenderRegion(std::move(region));
+
+    AL_LOG("RenderView %p simple occlusion test, bounds=%s, translation?=%d\n",
+           this, Stringify(aItem.bounds).c_str(), aItem.translation ? 1 : 0);
+    return mInvalidBounds.Intersects(aItem.bounds);
+  }
+
+  MOZ_ASSERT(aItem.rectilinear);
+
+  AL_LOG("RenderView %p starting visibility tests:\n", this);
+  AL_LOG("  occluded=%s\n", Stringify(mOccludedRegion).c_str());
+
+  // Compute the translation into render target space.
+  LayerIntPoint translation = LayerIntPoint::FromUnknownPoint(
+      aItem.translation.value() - mTargetOffset);
+  AL_LOG("  translation=%s\n", Stringify(translation).c_str());
+
+  IntRect clip = aItem.layer->GetComputedClipRect().ToUnknownRect();
+  AL_LOG("  clip=%s\n", Stringify(translation).c_str());
+
+  LayerIntRegion region = aItem.layer->GetShadowVisibleRegion();
+  region.MoveBy(translation);
+  AL_LOG("  effective-visible=%s\n", Stringify(region).c_str());
+
+  region.SubOut(mOccludedRegion);
+  region.AndWith(LayerIntRect::FromUnknownRect(mInvalidBounds));
+  region.AndWith(LayerIntRect::FromUnknownRect(clip));
+  if (region.IsEmpty()) {
+    return false;
+  }
+
+  // Move the visible region back into layer space.
+  region.MoveBy(-translation);
+  AL_LOG("  new-local-visible=%s\n", Stringify(region).c_str());
+
+  aItem.layer->SetRenderRegion(std::move(region));
+
+  // Apply the new occluded area. We do another dance with the translation to
+  // avoid copying the region. We do this after the SetRegionToRender call to
+  // accomodate the possiblity of a layer changing its visible region.
+  if (aItem.opaque) {
+    mOccludedRegion.MoveBy(-translation);
+    mOccludedRegion.OrWith(aItem.layer->GetRenderRegion());
+    mOccludedRegion.MoveBy(translation);
+    AL_LOG("  new-occluded=%s\n", Stringify(mOccludedRegion).c_str());
+
+    // If the occluded region gets too complicated, we reset it.
+    if (mOccludedRegion.GetNumRects() >= 32) {
+      mOccludedRegion.SetEmpty();
+      AL_LOG("  clear-occluded, too many rects\n");
+    }
+  }
+  return true;
+}
+
+void RenderViewMLGPU::AddItemFrontToBack(LayerMLGPU* aLayer, ItemInfo& aItem) {
+  // We receive items in front-to-back order. Ideally we want to push items
+  // as far back into batches impossible, to ensure the GPU can do a good
+  // job at culling. However we also want to make sure we actually batch
+  // items versus drawing one primitive per pass.
+  //
+  // As a compromise we look at the most 3 recent batches and then give up.
+  // This can be tweaked in the future.
+  static const size_t kMaxSearch = 3;
+  size_t iterations = 0;
+  for (auto iter = mFrontToBack.rbegin(); iter != mFrontToBack.rend(); iter++) {
+    RenderPassMLGPU* pass = (*iter);
+    if (pass->IsCompatible(aItem) && pass->AcceptItem(aItem)) {
+      AL_LOG("RenderView %p added layer %p to pass %p (%d)\n", this,
+             aLayer->GetLayer(), pass, int(pass->GetType()));
+      return;
+    }
+    if (++iterations > kMaxSearch) {
+      break;
+    }
+  }
+
+  RefPtr<RenderPassMLGPU> pass = RenderPassMLGPU::CreatePass(mBuilder, aItem);
+  if (!pass || !pass->AcceptItem(aItem)) {
+    MOZ_ASSERT_UNREACHABLE("Could not build a pass for item!");
+    return;
+  }
+  AL_LOG("RenderView %p added layer %p to new pass %p (%d)\n", this,
+         aLayer->GetLayer(), pass.get(), int(pass->GetType()));
+
+  mFrontToBack.push_back(pass);
+}
+
+void RenderViewMLGPU::AddItemBackToFront(LayerMLGPU* aLayer, ItemInfo& aItem) {
+  // We receive layers in front-to-back order, but there are two cases when we
+  // actually draw back-to-front: when the depth buffer is disabled, or when
+  // using the depth buffer and the item has transparent pixels (and therefore
+  // requires blending). In these cases we will build vertex and constant
+  // buffers in reverse, as well as execute batches in reverse, to ensure the
+  // correct ordering.
+  //
+  // Note: We limit the number of batches we search through, since it's better
+  // to add new draw calls than spend too much time finding compatible
+  // batches further down.
+  static const size_t kMaxSearch = 10;
+  size_t iterations = 0;
+  for (auto iter = mBackToFront.begin(); iter != mBackToFront.end(); iter++) {
+    RenderPassMLGPU* pass = (*iter);
+    if (pass->IsCompatible(aItem) && pass->AcceptItem(aItem)) {
+      AL_LOG("RenderView %p added layer %p to pass %p (%d)\n", this,
+             aLayer->GetLayer(), pass, int(pass->GetType()));
+      return;
+    }
+    if (pass->Intersects(aItem)) {
+      break;
+    }
+    if (++iterations > kMaxSearch) {
+      break;
+    }
+  }
+
+  RefPtr<RenderPassMLGPU> pass = RenderPassMLGPU::CreatePass(mBuilder, aItem);
+  if (!pass || !pass->AcceptItem(aItem)) {
+    MOZ_ASSERT_UNREACHABLE("Could not build a pass for item!");
+    return;
+  }
+  AL_LOG("RenderView %p added layer %p to new pass %p (%d)\n", this,
+         aLayer->GetLayer(), pass.get(), int(pass->GetType()));
+
+  mBackToFront.push_front(pass);
+}
+
+void RenderViewMLGPU::Prepare() {
+  if (!mTarget) {
+    return;
+  }
+
+  // Prepare front-to-back passes. These are only present when using the depth
+  // buffer, and they contain only opaque data.
+  for (RefPtr<RenderPassMLGPU>& pass : mFrontToBack) {
+    pass->PrepareForRendering();
+  }
+
+  // Prepare the Clear buffer, which will fill the render target with
+  // transparent pixels. This must happen before we set up world constants,
+  // since it can create new z-indices.
+  PrepareClears();
+
+  // Prepare the world constant buffer. This must be called after we've
+  // finished allocating all z-indices.
+  {
+    WorldConstants vsConstants;
+    Matrix4x4 projection = Matrix4x4::Translation(-1.0, 1.0, 0.0);
+    projection.PreScale(2.0 / float(mTarget->GetSize().width),
+                        2.0 / float(mTarget->GetSize().height), 1.0f);
+    projection.PreScale(1.0f, -1.0f, 1.0f);
+
+    memcpy(vsConstants.projection, &projection._11, 64);
+    vsConstants.targetOffset = Point(mTargetOffset);
+    vsConstants.sortIndexOffset = PrepareDepthBuffer();
+    vsConstants.debugFrameNumber =
+        mBuilder->GetManager()->GetDebugFrameNumber();
+
+    SharedConstantBuffer* shared = mDevice->GetSharedVSBuffer();
+    if (!shared->Allocate(&mWorldConstants, vsConstants)) {
+      return;
+    }
+  }
+
+  // Prepare back-to-front passes. In depth buffer mode, these contain draw
+  // calls that might produce transparent pixels. When using CPU-based occlusion
+  // culling, all draw calls are back-to-front.
+  for (RefPtr<RenderPassMLGPU>& pass : mBackToFront) {
+    pass->PrepareForRendering();
+  }
+
+  // Now, process children.
+  for (const auto& iter : mChildren) {
+    iter->Prepare();
+  }
+}
+
+void RenderViewMLGPU::ExecuteRendering() {
+  if (!mTarget) {
+    return;
+  }
+  if (!mWorldConstants.IsValid()) {
+    gfxWarning() << "Failed to allocate constant buffer for world transform";
+    return;
+  }
+
+  SetDeviceState();
+
+  // If using the depth buffer, clear it (if needed) and enable writes.
+  if (mUseDepthBuffer) {
+    if (mDepthBufferNeedsClear) {
+      mDevice->ClearDepthBuffer(mTarget);
+    }
+    SetDepthTestMode(MLGDepthTestMode::Write);
+  }
+
+  // Opaque items, rendered front-to-back.
+  for (auto iter = mFrontToBack.begin(); iter != mFrontToBack.end(); iter++) {
+    ExecutePass(*iter);
+  }
+
+  if (mUseDepthBuffer) {
+    // From now on we might be rendering transparent pixels, so we disable
+    // writing to the z-buffer.
+    SetDepthTestMode(MLGDepthTestMode::ReadOnly);
+  }
+
+  // Clear any pixels that are not occluded, and therefore might require
+  // blending.
+  mDevice->DrawClearRegion(mPreClear);
+
+  // Render back-to-front passes.
+  for (auto iter = mBackToFront.begin(); iter != mBackToFront.end(); iter++) {
+    ExecutePass(*iter);
+  }
+
+  // Make sure the post-clear area has no pixels.
+  if (!mPostClearRegion.IsEmpty()) {
+    mDevice->DrawClearRegion(mPostClear);
+  }
+
+  // We repaint the entire invalid region, even if it is partially occluded.
+  // Thus it's safe for us to clear the invalid area here. If we ever switch
+  // to nsIntRegions, we will have to take the difference between the paitned
+  // area and the invalid area.
+  if (mContainer) {
+    mContainer->ClearInvalidRect();
+  }
+}
+
+void RenderViewMLGPU::ExecutePass(RenderPassMLGPU* aPass) {
+  if (!aPass->IsPrepared()) {
+    return;
+  }
+
+  // Change the layer buffer if needed.
+  if (aPass->GetLayerBufferIndex() != mCurrentLayerBufferIndex) {
+    mCurrentLayerBufferIndex = aPass->GetLayerBufferIndex();
+
+    ConstantBufferSection section =
+        mBuilder->GetLayerBufferByIndex(mCurrentLayerBufferIndex);
+    mDevice->SetVSConstantBuffer(kLayerBufferSlot, &section);
+  }
+
+  // Change the mask rect buffer if needed.
+  if (aPass->GetMaskRectBufferIndex() &&
+      aPass->GetMaskRectBufferIndex().value() != mCurrentMaskRectBufferIndex) {
+    mCurrentMaskRectBufferIndex = aPass->GetMaskRectBufferIndex().value();
+
+    ConstantBufferSection section =
+        mBuilder->GetMaskRectBufferByIndex(mCurrentMaskRectBufferIndex);
+    mDevice->SetVSConstantBuffer(kMaskBufferSlot, &section);
+  }
+
+  aPass->ExecuteRendering();
+}
+
+void RenderViewMLGPU::SetDeviceState() {
+  // Note: we unbind slot 0 (which is where the render target could have been
+  // bound on a previous frame). Otherwise we trigger
+  // D3D11_DEVICE_PSSETSHADERRESOURCES_HAZARD.
+  mDevice->UnsetPSTexture(0);
+  mDevice->SetRenderTarget(mTarget);
+  mDevice->SetViewport(IntRect(IntPoint(0, 0), mTarget->GetSize()));
+  mDevice->SetScissorRect(Some(mInvalidBounds));
+  mDevice->SetVSConstantBuffer(kWorldConstantBufferSlot, &mWorldConstants);
+}
+
+void RenderViewMLGPU::SetDepthTestMode(MLGDepthTestMode aMode) {
+  mDevice->SetDepthTestMode(aMode);
+  mCurrentDepthMode = aMode;
+}
+
+void RenderViewMLGPU::RestoreDeviceState() {
+  SetDeviceState();
+  mDevice->SetDepthTestMode(mCurrentDepthMode);
+  mCurrentLayerBufferIndex = kInvalidResourceIndex;
+  mCurrentMaskRectBufferIndex = kInvalidResourceIndex;
+}
+
+int32_t RenderViewMLGPU::PrepareDepthBuffer() {
+  if (!mUseDepthBuffer) {
+    return 0;
+  }
+
+  // Rather than clear the depth buffer every frame, we offset z-indices each
+  // frame, starting with indices far away from the screen and moving toward
+  // the user each successive frame. This ensures that frames can re-use the
+  // depth buffer but never collide with previously written values.
+  //
+  // Once a frame runs out of sort indices, we finally clear the depth buffer
+  // and start over again.
+
+  // Note: the lowest sort index (kDepthLimit) is always occluded since it will
+  // resolve to the clear value - kDepthLimit / kDepthLimit == 1.0.
+  //
+  // If we don't have any more indices to allocate, we need to clear the depth
+  // buffer and start fresh.
+  int32_t highestIndex = mTarget->GetLastDepthStart();
+  if (highestIndex < mNextSortIndex) {
+    mDepthBufferNeedsClear = true;
+    highestIndex = kDepthLimit;
+  }
+
+  // We should not have more than kDepthLimit layers to draw. The last known
+  // sort index might appear in the depth buffer and occlude something, so
+  // we subtract 1. This ensures all our indices will compare less than all
+  // old indices.
+  int32_t sortOffset = highestIndex - mNextSortIndex - 1;
+  MOZ_ASSERT(sortOffset >= 0);
+
+  mTarget->SetLastDepthStart(sortOffset);
+  return sortOffset;
+}
+
+void RenderViewMLGPU::PrepareClears() {
+  // We don't do any clearing if we're copying from a source backdrop.
+  if (mContainer && mContainer->NeedsSurfaceCopy()) {
+    return;
+  }
+
+  // Get the list of rects to clear. If using the depth buffer, we don't
+  // care if it's accurate since the GPU will do occlusion testing for us.
+  // If not using the depth buffer, we subtract out the occluded region.
+  LayerIntRegion region = LayerIntRect::FromUnknownRect(mInvalidBounds);
+  if (!mUseDepthBuffer) {
+    // Don't let the clear region become too complicated.
+    region.SubOut(mOccludedRegion);
+    region.SimplifyOutward(kMaxClearViewRects);
+  }
+
+  Maybe<int32_t> sortIndex;
+  if (mUseDepthBuffer) {
+    // Note that we use the lowest available sorting index, to ensure that when
+    // using the z-buffer, we don't draw over already-drawn content.
+    sortIndex = Some(mNextSortIndex++);
+  }
+
+  nsTArray<IntRect> rects = ToRectArray(region);
+  mDevice->PrepareClearRegion(&mPreClear, std::move(rects), sortIndex);
+
+  if (!mPostClearRegion.IsEmpty()) {
+    // Prepare the final clear as well. Note that we always do this clear at the
+    // very end, even when the depth buffer is enabled, so we don't bother
+    // setting a useful sorting index. If and when we try to ship the depth
+    // buffer, we would execute this clear earlier in the pipeline and give it
+    // the closest possible z-ordering to the screen.
+    nsTArray<IntRect> rects = ToRectArray(mPostClearRegion);
+    mDevice->PrepareClearRegion(&mPostClear, std::move(rects), Nothing());
+  }
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/RenderViewMLGPU.h b/gfx/layers/mlgpu/RenderViewMLGPU.h
new file mode 100644
index 0000000000..1ad6c20eda
--- /dev/null
+++ b/gfx/layers/mlgpu/RenderViewMLGPU.h
@@ -0,0 +1,136 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_RenderViewMLGPU_h
+#define mozilla_gfx_layers_mlgpu_RenderViewMLGPU_h
+
+#include "LayerManagerMLGPU.h"
+#include "ClearRegionHelper.h"
+#include "RenderPassMLGPU.h"
+#include "Units.h"
+#include <deque>
+
+namespace mozilla {
+namespace layers {
+
+class FrameBuilder;
+class ContainerLayerMLGPU;
+class MLGRenderTarget;
+
+class RenderViewMLGPU {
+ public:
+  NS_INLINE_DECL_REFCOUNTING(RenderViewMLGPU)
+
+  // Constructor for the widget render target.
+  RenderViewMLGPU(FrameBuilder* aBuilder, MLGRenderTarget* aTarget,
+                  const nsIntRegion& aInvalidRegion);
+
+  // Constructor for intermediate surfaces.
+  RenderViewMLGPU(FrameBuilder* aBuilder, ContainerLayerMLGPU* aContainer,
+                  RenderViewMLGPU* aParent);
+
+  void Prepare();
+  void Render();
+  void AddChild(RenderViewMLGPU* aParent);
+  void AddItem(LayerMLGPU* aItem, const gfx::IntRect& aBounds,
+               Maybe<gfx::Polygon>&& aGeometry);
+  void FinishBuilding();
+
+  const gfx::IntPoint& GetTargetOffset() const { return mTargetOffset; }
+  RenderViewMLGPU* GetParent() const { return mParent; }
+  bool HasDepthBuffer() const { return mUseDepthBuffer; }
+
+  // Render after having previously delayed rendering due to the view
+  // requiring a backdrop copy.
+  void RenderAfterBackdropCopy();
+  void RestoreDeviceState();
+
+  // The size and render target cannot be read until the view has finished
+  // building, since we try to right-size the render target to the visible
+  // region.
+  MLGRenderTarget* GetRenderTarget() const;
+  gfx::IntSize GetSize() const;
+
+  gfx::IntRect GetInvalidRect() const { return mInvalidBounds; }
+
+ private:
+  RenderViewMLGPU(FrameBuilder* aBuilder, RenderViewMLGPU* aParent);
+  ~RenderViewMLGPU();
+
+  void ExecuteRendering();
+  bool UpdateVisibleRegion(ItemInfo& aItem);
+  void AddItemFrontToBack(LayerMLGPU* aLayer, ItemInfo& aItem);
+  void AddItemBackToFront(LayerMLGPU* aLayer, ItemInfo& aItem);
+
+  void PrepareClears();
+  void SetDeviceState();
+  void SetDepthTestMode(MLGDepthTestMode aMode);
+
+  void ExecutePass(RenderPassMLGPU* aPass);
+
+  // Return the sorting index offset to use.
+  int32_t PrepareDepthBuffer();
+
+ private:
+  std::deque<RefPtr<RenderPassMLGPU>> mFrontToBack;
+  std::deque<RefPtr<RenderPassMLGPU>> mBackToFront;
+
+  FrameBuilder* mBuilder;
+  RefPtr<MLGDevice> mDevice;
+  RenderViewMLGPU* mParent;
+  std::vector<RefPtr<RenderViewMLGPU>> mChildren;
+
+  // Shader data.
+  ConstantBufferSection mWorldConstants;
+
+  // Information for the initial target surface clear. This covers the area that
+  // won't be occluded by opaque content.
+  ClearRegionHelper mPreClear;
+
+  // The post-clear region, that must be cleared after all drawing is done.
+  nsIntRegion mPostClearRegion;
+  ClearRegionHelper mPostClear;
+
+  // Either an MLGSwapChain-derived render target, or an intermediate surface.
+  RefPtr<MLGRenderTarget> mTarget;
+
+  // For intermediate render targets only, this is the layer owning the render
+  // target.
+  ContainerLayerMLGPU* mContainer;
+
+  // The offset adjustment from container layer space to render target space.
+  // This is 0,0 for the root view.
+  gfx::IntPoint mTargetOffset;
+
+  // The invalid bounds as computed by LayerTreeInvalidation. This is the
+  // initial render bounds size, if invalidation is disabled.
+  gfx::IntRect mInvalidBounds;
+
+  // The occluded region, which is updated every time we process an opaque,
+  // rectangular item. This is not actually in LayerPixels, we do this to
+  // avoid FromUnknownRegion which has array copies.
+  LayerIntRegion mOccludedRegion;
+
+  // True if we've finished adding layers to the view.
+  bool mFinishedBuilding;
+
+  // This state is used to avoid changing buffers while we execute batches.
+  size_t mCurrentLayerBufferIndex;
+  size_t mCurrentMaskRectBufferIndex;
+
+  // This state is saved locally so it can be restored in RestoreDeviceState.
+  MLGDepthTestMode mCurrentDepthMode;
+
+  // Depth-buffer tracking.
+  int32_t mNextSortIndex;
+  bool mUseDepthBuffer;
+  bool mDepthBufferNeedsClear;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_RenderViewMLGPU_h
diff --git a/gfx/layers/mlgpu/ShaderDefinitionsMLGPU-inl.h b/gfx/layers/mlgpu/ShaderDefinitionsMLGPU-inl.h
new file mode 100644
index 0000000000..9197f7e870
--- /dev/null
+++ b/gfx/layers/mlgpu/ShaderDefinitionsMLGPU-inl.h
@@ -0,0 +1,79 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _include_gfx_layers_mlgpu_ShaderDefinitions_inl_h
+#define _include_gfx_layers_mlgpu_ShaderDefinitions_inl_h
+
+namespace mozilla {
+namespace layers {
+namespace mlg {
+
+inline const Maybe<gfx::Polygon>& SimpleTraits::geometry() const {
+  return mItem.geometry;
+}
+
+inline nsTArray<gfx::Triangle> SimpleTraits::GenerateTriangles(
+    const gfx::Polygon& aPolygon) const {
+  return aPolygon.ToTriangles();
+}
+
+inline SimpleTraits::TriangleVertices SimpleTraits::MakeVertex(
+    const FirstTriangle& aIgnore) const {
+  TriangleVertices v = {mRect.BottomLeft(), mRect.TopLeft(), mRect.TopRight(),
+                        mItem.layerIndex, mItem.sortOrder};
+  return v;
+}
+
+inline SimpleTraits::TriangleVertices SimpleTraits::MakeVertex(
+    const SecondTriangle& aIgnore) const {
+  TriangleVertices v = {mRect.TopRight(), mRect.BottomRight(),
+                        mRect.BottomLeft(), mItem.layerIndex, mItem.sortOrder};
+  return v;
+}
+
+inline SimpleTraits::TriangleVertices SimpleTraits::MakeVertex(
+    const gfx::Triangle& aTriangle) const {
+  TriangleVertices v = {aTriangle.p1, aTriangle.p2, aTriangle.p3,
+                        mItem.layerIndex, mItem.sortOrder};
+  return v;
+}
+
+inline SimpleTraits::UnitQuadVertex SimpleTraits::MakeUnitQuadVertex() const {
+  UnitQuadVertex v = {mRect, mItem.layerIndex, mItem.sortOrder};
+  return v;
+}
+
+inline nsTArray<gfx::TexturedTriangle> TexturedTraits::GenerateTriangles(
+    const gfx::Polygon& aPolygon) const {
+  return GenerateTexturedTriangles(aPolygon, mRect, mTexCoords);
+}
+
+inline TexturedTraits::VertexData TexturedTraits::MakeVertexData(
+    const FirstTriangle& aIgnore) const {
+  VertexData v = {mTexCoords.BottomLeft(), mTexCoords.TopLeft(),
+                  mTexCoords.TopRight()};
+  return v;
+}
+
+inline TexturedTraits::VertexData TexturedTraits::MakeVertexData(
+    const SecondTriangle& aIgnore) const {
+  VertexData v = {mTexCoords.TopRight(), mTexCoords.BottomRight(),
+                  mTexCoords.BottomLeft()};
+  return v;
+}
+
+inline TexturedTraits::VertexData TexturedTraits::MakeVertexData(
+    const gfx::TexturedTriangle& aTriangle) const {
+  VertexData v = {aTriangle.textureCoords.p1, aTriangle.textureCoords.p2,
+                  aTriangle.textureCoords.p3};
+  return v;
+}
+
+}  // namespace mlg
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // _include_gfx_layers_mlgpu_ShaderDefinitions_inl_h
diff --git a/gfx/layers/mlgpu/ShaderDefinitionsMLGPU.h b/gfx/layers/mlgpu/ShaderDefinitionsMLGPU.h
new file mode 100644
index 0000000000..9c3d490a28
--- /dev/null
+++ b/gfx/layers/mlgpu/ShaderDefinitionsMLGPU.h
@@ -0,0 +1,195 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MOZILLA_GFX_SHADERDEFINITIONSMLGPU_H
+#define MOZILLA_GFX_SHADERDEFINITIONSMLGPU_H
+
+#include "mozilla/gfx/Point.h"
+#include "mozilla/gfx/Triangle.h"
+#include "mozilla/gfx/Types.h"
+#include "mozilla/layers/LayersHelpers.h"
+#include "nsTArray.h"
+
+namespace mozilla {
+namespace layers {
+
+struct ItemInfo;
+class ShaderRenderPass;
+
+namespace mlg {
+
+// These may need to move into run-time values determined by MLGDevice.
+static const size_t kConstantBufferElementSize = 16;
+static const size_t kMaxConstantBufferSize = 4096 * kConstantBufferElementSize;
+
+// Vertex shader slots. We reverse the first two slots across all shaders,
+// and the first three slots free across all RenderPass shaders, for
+// uniformity.
+static const uint32_t kWorldConstantBufferSlot = 0;
+static const uint32_t kLayerBufferSlot = 1;
+static const uint32_t kMaskBufferSlot = 3;
+static const uint32_t kBlendConstantBufferSlot = 4;
+static const uint32_t kClearConstantBufferSlot = 2;
+
+// This is specified in common-ps.hlsl.
+static const uint32_t kMaskLayerTextureSlot = 4;
+static const uint32_t kDefaultSamplerSlot = 0;
+static const uint32_t kMaskSamplerSlot = 1;
+
+// These are the maximum slot numbers we bind. We assert that no binding
+// happens above the max slot, since we try to clear buffer bindings at
+// the end of each frame.
+static const uint32_t kMaxVertexShaderConstantBuffers = 5;
+static const uint32_t kMaxPixelShaderConstantBuffers = 3;
+
+// Maximum depth in the depth buffer. This must match common-vs.hlsl.
+static const int32_t kDepthLimit = 1000000;
+
+struct WorldConstants {
+  float projection[4][4];
+  gfx::Point targetOffset;
+  int sortIndexOffset;
+  unsigned debugFrameNumber;
+};
+
+struct ClearConstants {
+  explicit ClearConstants(int aDepth) : depth(aDepth) {}
+  int depth;
+  int padding[3];
+};
+
+struct LayerConstants {
+  float transform[4][4];
+  gfx::Rect clipRect;
+  uint32_t maskIndex;
+  uint32_t padding[3];
+};
+
+struct MaskCombineInput {
+  float texCoords[4];
+};
+
+struct MaskInformation {
+  MaskInformation(float aOpacity, bool aHasMask)
+      : opacity(aOpacity), hasMask(aHasMask ? 1 : 0) {}
+  float opacity;
+  uint32_t hasMask;
+  uint32_t padding[2];
+};
+
+struct YCbCrShaderConstants {
+  float yuvColorMatrix[3][4];
+};
+
+struct YCbCrColorDepthConstants {
+  float coefficient;
+  uint32_t padding[3];
+};
+
+struct BlendVertexShaderConstants {
+  float backdropTransform[4][4];
+};
+
+template <typename T>
+static inline nsTArray<gfx::IntRect> ToRectArray(const T& aRegion) {
+  nsTArray<gfx::IntRect> rects;
+  for (auto iter = aRegion.RectIter(); !iter.Done(); iter.Next()) {
+    rects.AppendElement(iter.Get().ToUnknownRect());
+  }
+  return rects;
+}
+
+struct SimpleTraits {
+  SimpleTraits(const ItemInfo& aItem, const gfx::Rect& aRect)
+      : mItem(aItem), mRect(aRect) {}
+
+  // Helper nonce structs so functions can break vertex data up by each
+  // triangle in a quad, or return vertex info for a unit quad.
+  struct AnyTriangle {};
+  struct FirstTriangle : AnyTriangle {};
+  struct SecondTriangle : AnyTriangle {};
+  struct UnitQuad {};
+
+  // This is the base vertex layout used by all unit quad shaders.
+  struct UnitQuadVertex {
+    gfx::Rect rect;
+    uint32_t layerIndex;
+    int depth;
+  };
+
+  // This is the base vertex layout used by all unit triangle shaders.
+  struct TriangleVertices {
+    gfx::Point p1, p2, p3;
+    uint32_t layerIndex;
+    int depth;
+  };
+
+  // Helper functions for populating a TriangleVertices. The first two use mRect
+  // to generate triangles, the third function uses coordinates from an already
+  // computed triangle.
+  TriangleVertices MakeVertex(const FirstTriangle& aIgnore) const;
+  TriangleVertices MakeVertex(const SecondTriangle& aIgnore) const;
+  TriangleVertices MakeVertex(const gfx::Triangle& aTriangle) const;
+
+  UnitQuadVertex MakeUnitQuadVertex() const;
+
+  // This default GenerateTriangles only computes the 3 points of each triangle
+  // in the polygon. If needed, shaders can override this and return a more
+  // complex triangle, to encode dependent information in extended vertex data.
+  //
+  // AddShaderVertices will deduce this return type. It should be an nsTArray<T>
+  // where T inherits from Triangle.
+  nsTArray<gfx::Triangle> GenerateTriangles(const gfx::Polygon& aPolygon) const;
+
+  // Accessors.
+  const Maybe<gfx::Polygon>& geometry() const;
+  const gfx::Rect& rect() const { return mRect; }
+
+  const ItemInfo& mItem;
+  gfx::Rect mRect;
+};
+
+struct ColorTraits : public SimpleTraits {
+  ColorTraits(const ItemInfo& aItem, const gfx::Rect& aRect,
+              const gfx::DeviceColor& aColor)
+      : SimpleTraits(aItem, aRect), mColor(aColor) {}
+
+  // Color data is the same across all vertex types.
+  template <typename VertexType>
+  const gfx::DeviceColor& MakeVertexData(const VertexType& aIgnore) const {
+    return mColor;
+  }
+
+  gfx::DeviceColor mColor;
+};
+
+struct TexturedTraits : public SimpleTraits {
+  TexturedTraits(const ItemInfo& aItem, const gfx::Rect& aRect,
+                 const gfx::Rect& aTexCoords)
+      : SimpleTraits(aItem, aRect), mTexCoords(aTexCoords) {}
+
+  // Textured triangles need to compute a texture coordinate for each vertex.
+  nsTArray<gfx::TexturedTriangle> GenerateTriangles(
+      const gfx::Polygon& aPolygon) const;
+
+  struct VertexData {
+    gfx::Point p1, p2, p3;
+  };
+  VertexData MakeVertexData(const FirstTriangle& aIgnore) const;
+  VertexData MakeVertexData(const SecondTriangle& aIgnore) const;
+  VertexData MakeVertexData(const gfx::TexturedTriangle& aTriangle) const;
+  const gfx::Rect& MakeVertexData(const UnitQuad& aIgnore) const {
+    return mTexCoords;
+  }
+
+  gfx::Rect mTexCoords;
+};
+
+}  // namespace mlg
+}  // namespace layers
+}  // namespace mozilla
+
+#endif
diff --git a/gfx/layers/mlgpu/SharedBufferMLGPU.cpp b/gfx/layers/mlgpu/SharedBufferMLGPU.cpp
new file mode 100644
index 0000000000..b6c9978c80
--- /dev/null
+++ b/gfx/layers/mlgpu/SharedBufferMLGPU.cpp
@@ -0,0 +1,275 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "SharedBufferMLGPU.h"
+#include "BufferCache.h"
+#include "MLGDevice.h"
+
+namespace mozilla {
+namespace layers {
+
+SharedBufferMLGPU::SharedBufferMLGPU(MLGDevice* aDevice, MLGBufferType aType,
+                                     size_t aDefaultSize)
+    : mDevice(aDevice),
+      mType(aType),
+      mDefaultSize(aDefaultSize),
+      mCanUseOffsetAllocation(true),
+      mCurrentPosition(0),
+      mMaxSize(0),
+      mMap(),
+      mMapped(false),
+      mBytesUsedThisFrame(0),
+      mNumSmallFrames(0) {
+  MOZ_COUNT_CTOR(SharedBufferMLGPU);
+}
+
+SharedBufferMLGPU::~SharedBufferMLGPU() {
+  MOZ_COUNT_DTOR(SharedBufferMLGPU);
+  Unmap();
+}
+
+bool SharedBufferMLGPU::Init() {
+  // If we can't use buffer offset binding, we never allocated shared buffers.
+  if (!mCanUseOffsetAllocation) {
+    return true;
+  }
+
+  // If we can use offset binding, allocate an initial shared buffer now.
+  if (!GrowBuffer(mDefaultSize)) {
+    return false;
+  }
+  return true;
+}
+
+void SharedBufferMLGPU::Reset() {
+  // We shouldn't be mapped here, but just in case, unmap now.
+  Unmap();
+  mBytesUsedThisFrame = 0;
+
+  // If we allocated a large buffer for a particularly heavy layer tree,
+  // but have not used most of the buffer again for many frames, we
+  // discard the buffer. This is to prevent having to perform large
+  // pointless uploads after visiting a single havy page - it also
+  // lessens ping-ponging between large and small buffers.
+  if (mBuffer && (mBuffer->GetSize() > mDefaultSize * 4) &&
+      mNumSmallFrames >= 10) {
+    mBuffer = nullptr;
+  }
+
+  // Note that we do not aggressively map a new buffer. There's no reason to,
+  // and it'd cause unnecessary uploads when painting empty frames.
+}
+
+bool SharedBufferMLGPU::EnsureMappedBuffer(size_t aBytes) {
+  if (!mBuffer || (mMaxSize - mCurrentPosition < aBytes)) {
+    if (!GrowBuffer(aBytes)) {
+      return false;
+    }
+  }
+  if (!mMapped && !Map()) {
+    return false;
+  }
+  return true;
+}
+
+// We don't want to cache large buffers, since it results in larger uploads
+// that might not be needed.
+static const size_t kMaxCachedBufferSize = 128 * 1024;
+
+bool SharedBufferMLGPU::GrowBuffer(size_t aBytes) {
+  // We only pre-allocate buffers if we can use offset allocation.
+  MOZ_ASSERT(mCanUseOffsetAllocation);
+
+  // Unmap the previous buffer. This will retain mBuffer, but free up the
+  // address space used by its mapping.
+  Unmap();
+
+  size_t maybeSize = mDefaultSize;
+  if (mBuffer) {
+    // Try to first grow the previous allocation size.
+    maybeSize = std::min(kMaxCachedBufferSize, mBuffer->GetSize() * 2);
+  }
+
+  size_t bytes = std::max(aBytes, maybeSize);
+  mBuffer = mDevice->CreateBuffer(mType, bytes, MLGUsage::Dynamic);
+  if (!mBuffer) {
+    return false;
+  }
+
+  mCurrentPosition = 0;
+  mMaxSize = mBuffer->GetSize();
+  return true;
+}
+
+void SharedBufferMLGPU::PrepareForUsage() {
+  Unmap();
+
+  if (mBytesUsedThisFrame <= mDefaultSize) {
+    mNumSmallFrames++;
+  } else {
+    mNumSmallFrames = 0;
+  }
+}
+
+bool SharedBufferMLGPU::Map() {
+  MOZ_ASSERT(mBuffer);
+  MOZ_ASSERT(!mMapped);
+
+  if (!mDevice->Map(mBuffer, MLGMapType::WRITE_DISCARD, &mMap)) {
+    // Don't retain the buffer, it's useless if we can't map it.
+    mBuffer = nullptr;
+    return false;
+  }
+
+  mCurrentPosition = 0;
+  mMapped = true;
+  return true;
+}
+
+void SharedBufferMLGPU::Unmap() {
+  if (!mMapped) {
+    return;
+  }
+
+  mBytesUsedThisFrame += mCurrentPosition;
+
+  mDevice->Unmap(mBuffer);
+  mMap = MLGMappedResource();
+  mMapped = false;
+}
+
+uint8_t* SharedBufferMLGPU::GetBufferPointer(size_t aBytes,
+                                             ptrdiff_t* aOutOffset,
+                                             RefPtr<MLGBuffer>* aOutBuffer) {
+  if (!EnsureMappedBuffer(aBytes)) {
+    return nullptr;
+  }
+
+  ptrdiff_t newPos = mCurrentPosition + aBytes;
+  MOZ_ASSERT(size_t(newPos) <= mMaxSize);
+
+  *aOutOffset = mCurrentPosition;
+  *aOutBuffer = mBuffer;
+
+  uint8_t* ptr = reinterpret_cast<uint8_t*>(mMap.mData) + mCurrentPosition;
+  mCurrentPosition = newPos;
+  return ptr;
+}
+
+VertexBufferSection::VertexBufferSection()
+    : mOffset(-1), mNumVertices(0), mStride(0) {}
+
+void VertexBufferSection::Init(MLGBuffer* aBuffer, ptrdiff_t aOffset,
+                               size_t aNumVertices, size_t aStride) {
+  mBuffer = aBuffer;
+  mOffset = aOffset;
+  mNumVertices = aNumVertices;
+  mStride = aStride;
+}
+
+ConstantBufferSection::ConstantBufferSection()
+    : mOffset(-1), mNumBytes(0), mNumItems(0) {}
+
+void ConstantBufferSection::Init(MLGBuffer* aBuffer, ptrdiff_t aOffset,
+                                 size_t aBytes, size_t aNumItems) {
+  mBuffer = aBuffer;
+  mOffset = aOffset;
+  mNumBytes = aBytes;
+  mNumItems = aNumItems;
+}
+
+SharedVertexBuffer::SharedVertexBuffer(MLGDevice* aDevice, size_t aDefaultSize)
+    : SharedBufferMLGPU(aDevice, MLGBufferType::Vertex, aDefaultSize) {}
+
+bool SharedVertexBuffer::Allocate(VertexBufferSection* aHolder,
+                                  size_t aNumItems, size_t aSizeOfItem,
+                                  const void* aData) {
+  RefPtr<MLGBuffer> buffer;
+  ptrdiff_t offset;
+  size_t bytes = aSizeOfItem * aNumItems;
+  uint8_t* ptr = GetBufferPointer(bytes, &offset, &buffer);
+  if (!ptr) {
+    return false;
+  }
+
+  memcpy(ptr, aData, bytes);
+  aHolder->Init(buffer, offset, aNumItems, aSizeOfItem);
+  return true;
+}
+
+AutoBufferUploadBase::AutoBufferUploadBase() : mPtr(nullptr) {}
+
+AutoBufferUploadBase::~AutoBufferUploadBase() {
+  if (mBuffer) {
+    UnmapBuffer();
+  }
+}
+
+void AutoBufferUploadBase::Init(void* aPtr, MLGDevice* aDevice,
+                                MLGBuffer* aBuffer) {
+  MOZ_ASSERT(!mPtr && aPtr);
+  mPtr = aPtr;
+  mDevice = aDevice;
+  mBuffer = aBuffer;
+}
+
+SharedConstantBuffer::SharedConstantBuffer(MLGDevice* aDevice,
+                                           size_t aDefaultSize)
+    : SharedBufferMLGPU(aDevice, MLGBufferType::Constant, aDefaultSize) {
+  mMaxConstantBufferBindSize = aDevice->GetMaxConstantBufferBindSize();
+  mCanUseOffsetAllocation = aDevice->CanUseConstantBufferOffsetBinding();
+}
+
+bool SharedConstantBuffer::Allocate(ConstantBufferSection* aHolder,
+                                    AutoBufferUploadBase* aPtr,
+                                    size_t aNumItems, size_t aSizeOfItem) {
+  MOZ_ASSERT(aSizeOfItem % 16 == 0, "Items must be padded to 16 bytes");
+
+  size_t bytes = aNumItems * aSizeOfItem;
+  if (bytes > mMaxConstantBufferBindSize) {
+    gfxWarning()
+        << "Attempted to allocate too many bytes into a constant buffer";
+    return false;
+  }
+
+  RefPtr<MLGBuffer> buffer;
+  ptrdiff_t offset;
+  if (!GetBufferPointer(aPtr, bytes, &offset, &buffer)) {
+    return false;
+  }
+
+  aHolder->Init(buffer, offset, bytes, aNumItems);
+  return true;
+}
+
+uint8_t* SharedConstantBuffer::AllocateNewBuffer(
+    size_t aBytes, ptrdiff_t* aOutOffset, RefPtr<MLGBuffer>* aOutBuffer) {
+  RefPtr<MLGBuffer> buffer;
+  if (BufferCache* cache = mDevice->GetConstantBufferCache()) {
+    buffer = cache->GetOrCreateBuffer(aBytes);
+  } else {
+    buffer = mDevice->CreateBuffer(MLGBufferType::Constant, aBytes,
+                                   MLGUsage::Dynamic);
+  }
+  if (!buffer) {
+    return nullptr;
+  }
+
+  MLGMappedResource map;
+  if (!mDevice->Map(buffer, MLGMapType::WRITE_DISCARD, &map)) {
+    return nullptr;
+  }
+
+  // Signal that offsetting is not supported.
+  *aOutOffset = -1;
+  *aOutBuffer = buffer;
+  return reinterpret_cast<uint8_t*>(map.mData);
+}
+
+void AutoBufferUploadBase::UnmapBuffer() { mDevice->Unmap(mBuffer); }
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/SharedBufferMLGPU.h b/gfx/layers/mlgpu/SharedBufferMLGPU.h
new file mode 100644
index 0000000000..19ece7ff11
--- /dev/null
+++ b/gfx/layers/mlgpu/SharedBufferMLGPU.h
@@ -0,0 +1,273 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_SharedBufferMLGPU_h
+#define mozilla_gfx_layers_mlgpu_SharedBufferMLGPU_h
+
+#include "ShaderDefinitionsMLGPU.h"
+#include "MLGDevice.h"
+#include "MLGDeviceTypes.h"
+#include "StagingBuffer.h"
+#include "mozilla/gfx/Logging.h"
+
+namespace mozilla {
+namespace layers {
+
+class MLGBuffer;
+
+class SharedBufferMLGPU {
+ public:
+  virtual ~SharedBufferMLGPU();
+
+  bool Init();
+
+  // Call before starting a new frame.
+  void Reset();
+
+  // Call to finish any pending uploads.
+  void PrepareForUsage();
+
+ protected:
+  SharedBufferMLGPU(MLGDevice* aDevice, MLGBufferType aType,
+                    size_t aDefaultSize);
+
+  bool EnsureMappedBuffer(size_t aBytes);
+  bool GrowBuffer(size_t aBytes);
+  void ForgetBuffer();
+  bool Map();
+  void Unmap();
+
+  uint8_t* GetBufferPointer(size_t aBytes, ptrdiff_t* aOutOffset,
+                            RefPtr<MLGBuffer>* aOutBuffer);
+
+ protected:
+  // Note: RefPtr here would cause a cycle. Only MLGDevice should own
+  // SharedBufferMLGPU objects for now.
+  MLGDevice* mDevice;
+  MLGBufferType mType;
+  size_t mDefaultSize;
+  bool mCanUseOffsetAllocation;
+
+  // When |mBuffer| is non-null, mMaxSize is the buffer size. If mapped, the
+  // position is between 0 and mMaxSize, otherwise it is always 0.
+  RefPtr<MLGBuffer> mBuffer;
+  ptrdiff_t mCurrentPosition;
+  size_t mMaxSize;
+
+  MLGMappedResource mMap;
+  bool mMapped;
+
+  // These are used to track how many frames come in under the default
+  // buffer size in a row.
+  size_t mBytesUsedThisFrame;
+  size_t mNumSmallFrames;
+};
+
+class VertexBufferSection final {
+  friend class SharedVertexBuffer;
+
+ public:
+  VertexBufferSection();
+
+  uint32_t Stride() const { return mStride; }
+  MLGBuffer* GetBuffer() const { return mBuffer; }
+  ptrdiff_t Offset() const {
+    MOZ_ASSERT(IsValid());
+    return mOffset;
+  }
+  size_t NumVertices() const { return mNumVertices; }
+  bool IsValid() const { return !!mBuffer; }
+
+ protected:
+  void Init(MLGBuffer* aBuffer, ptrdiff_t aOffset, size_t aNumVertices,
+            size_t aStride);
+
+ protected:
+  RefPtr<MLGBuffer> mBuffer;
+  ptrdiff_t mOffset;
+  size_t mNumVertices;
+  size_t mStride;
+};
+
+class ConstantBufferSection final {
+  friend class SharedConstantBuffer;
+
+ public:
+  ConstantBufferSection();
+
+  uint32_t NumConstants() const { return NumConstantsForBytes(mNumBytes); }
+  size_t NumItems() const { return mNumItems; }
+  uint32_t Offset() const {
+    MOZ_ASSERT(IsValid());
+    return mOffset / 16;
+  }
+  MLGBuffer* GetBuffer() const { return mBuffer; }
+  bool IsValid() const { return !!mBuffer; }
+  bool HasOffset() const { return mOffset != -1; }
+
+ protected:
+  static constexpr size_t NumConstantsForBytes(size_t aBytes) {
+    return (aBytes + ((256 - (aBytes % 256)) % 256)) / 16;
+  }
+
+  void Init(MLGBuffer* aBuffer, ptrdiff_t aOffset, size_t aBytes,
+            size_t aNumItems);
+
+ protected:
+  RefPtr<MLGBuffer> mBuffer;
+  ptrdiff_t mOffset;
+  size_t mNumBytes;
+  size_t mNumItems;
+};
+
+// Vertex buffers don't need special alignment.
+typedef StagingBuffer<0> VertexStagingBuffer;
+
+class SharedVertexBuffer final : public SharedBufferMLGPU {
+ public:
+  SharedVertexBuffer(MLGDevice* aDevice, size_t aDefaultSize);
+
+  // Allocate a buffer that can be uploaded immediately.
+  bool Allocate(VertexBufferSection* aHolder,
+                const VertexStagingBuffer& aStaging) {
+    return Allocate(aHolder, aStaging.NumItems(), aStaging.SizeOfItem(),
+                    aStaging.GetBufferStart());
+  }
+
+  // Allocate a buffer that can be uploaded immediately. This is the
+  // direct access version, for cases where a StagingBuffer is not
+  // needed.
+  bool Allocate(VertexBufferSection* aHolder, size_t aNumItems,
+                size_t aSizeOfItem, const void* aData);
+
+  template <typename T>
+  bool Allocate(VertexBufferSection* aHolder, const T& aItem) {
+    return Allocate(aHolder, 1, sizeof(T), &aItem);
+  }
+};
+
+// To support older Direct3D versions, we need to support one-off MLGBuffers,
+// where data is uploaded immediately rather than at the end of all batch
+// preparation. We achieve this through a small helper class.
+//
+// Note: the unmap is not inline sincce we don't include MLGDevice.h.
+class MOZ_STACK_CLASS AutoBufferUploadBase {
+ public:
+  AutoBufferUploadBase();
+  ~AutoBufferUploadBase();
+
+  void Init(void* aPtr) {
+    MOZ_ASSERT(!mPtr && aPtr);
+    mPtr = aPtr;
+  }
+  void Init(void* aPtr, MLGDevice* aDevice, MLGBuffer* aBuffer);
+  void* get() { return const_cast<void*>(mPtr); }
+
+ private:
+  void UnmapBuffer();
+
+ protected:
+  RefPtr<MLGDevice> mDevice;
+  RefPtr<MLGBuffer> mBuffer;
+  void* mPtr;
+};
+
+// This is a typed helper for AutoBufferUploadBase.
+template <typename T>
+class AutoBufferUpload : public AutoBufferUploadBase {
+ public:
+  AutoBufferUpload() = default;
+
+  T* operator->() const { return reinterpret_cast<T*>(mPtr); }
+};
+
+class SharedConstantBuffer final : public SharedBufferMLGPU {
+ public:
+  SharedConstantBuffer(MLGDevice* aDevice, size_t aDefaultSize);
+
+  // Allocate a buffer that can be immediately uploaded.
+  bool Allocate(ConstantBufferSection* aHolder,
+                const ConstantStagingBuffer& aStaging) {
+    MOZ_ASSERT(aStaging.NumItems() * aStaging.SizeOfItem() ==
+               aStaging.NumBytes());
+    return Allocate(aHolder, aStaging.NumItems(), aStaging.SizeOfItem(),
+                    aStaging.GetBufferStart());
+  }
+
+  // Allocate a buffer of one item that can be immediately uploaded.
+  template <typename T>
+  bool Allocate(ConstantBufferSection* aHolder, const T& aItem) {
+    return Allocate(aHolder, 1, sizeof(aItem), &aItem);
+  }
+
+  // Allocate a buffer of N items that can be immediately uploaded.
+  template <typename T>
+  bool Allocate(ConstantBufferSection* aHolder, const T* aItems,
+                size_t aNumItems) {
+    return Allocate(aHolder, aNumItems, sizeof(T), aItems);
+  }
+
+  // Allocate a buffer that is uploaded after the caller has finished writing
+  // to it. This should method should generally not be used unless copying T
+  // is expensive, since the default immediate-upload version has an implicit
+  // extra copy to the GPU. This version exposes the mapped memory directly.
+  template <typename T>
+  bool Allocate(ConstantBufferSection* aHolder, AutoBufferUpload<T>* aPtr) {
+    MOZ_ASSERT(sizeof(T) % 16 == 0, "Items must be padded to 16 bytes");
+
+    return Allocate(aHolder, aPtr, 1, sizeof(T));
+  }
+
+ private:
+  bool Allocate(ConstantBufferSection* aHolder, size_t aNumItems,
+                size_t aSizeOfItem, const void* aData) {
+    AutoBufferUploadBase ptr;
+    if (!Allocate(aHolder, &ptr, aNumItems, aSizeOfItem)) {
+      return false;
+    }
+    memcpy(ptr.get(), aData, aNumItems * aSizeOfItem);
+    return true;
+  }
+
+  bool Allocate(ConstantBufferSection* aHolder, AutoBufferUploadBase* aPtr,
+                size_t aNumItems, size_t aSizeOfItem);
+
+  bool GetBufferPointer(AutoBufferUploadBase* aPtr, size_t aBytes,
+                        ptrdiff_t* aOutOffset, RefPtr<MLGBuffer>* aOutBuffer) {
+    if (!mCanUseOffsetAllocation) {
+      uint8_t* ptr = AllocateNewBuffer(aBytes, aOutOffset, aOutBuffer);
+      if (!ptr) {
+        return false;
+      }
+      aPtr->Init(ptr, mDevice, *aOutBuffer);
+      return true;
+    }
+
+    // Align up the allocation to 256 bytes, since D3D11 requires that
+    // constant buffers start at multiples of 16 elements.
+    size_t alignedBytes = AlignUp<256>::calc(aBytes);
+
+    uint8_t* ptr = SharedBufferMLGPU::GetBufferPointer(alignedBytes, aOutOffset,
+                                                       aOutBuffer);
+    if (!ptr) {
+      return false;
+    }
+
+    aPtr->Init(ptr);
+    return true;
+  }
+
+  uint8_t* AllocateNewBuffer(size_t aBytes, ptrdiff_t* aOutOffset,
+                             RefPtr<MLGBuffer>* aOutBuffer);
+
+ private:
+  size_t mMaxConstantBufferBindSize;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_SharedBufferMLGPU_h
diff --git a/gfx/layers/mlgpu/StagingBuffer.cpp b/gfx/layers/mlgpu/StagingBuffer.cpp
new file mode 100644
index 0000000000..d82531be5e
--- /dev/null
+++ b/gfx/layers/mlgpu/StagingBuffer.cpp
@@ -0,0 +1,18 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "StagingBuffer.h"
+#include "MLGDevice.h"
+#include "ShaderDefinitionsMLGPU.h"
+
+namespace mozilla {
+namespace layers {
+
+ConstantStagingBuffer::ConstantStagingBuffer(MLGDevice* aDevice)
+    : StagingBuffer(mlg::kMaxConstantBufferSize) {}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/StagingBuffer.h b/gfx/layers/mlgpu/StagingBuffer.h
new file mode 100644
index 0000000000..1bbb0959ba
--- /dev/null
+++ b/gfx/layers/mlgpu/StagingBuffer.h
@@ -0,0 +1,271 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_StagingBuffer_h
+#define mozilla_gfx_layers_mlgpu_StagingBuffer_h
+
+#include "mozilla/MathAlgorithms.h"
+#include "mozilla/UniquePtr.h"
+#include "UtilityMLGPU.h"
+#include <algorithm>
+#include <utility>
+#include <limits.h>
+
+namespace mozilla {
+namespace layers {
+
+class MLGDevice;
+
+// A StagingBuffer is a writable memory buffer for arbitrary contents.
+template <size_t Alignment = 0>
+class StagingBuffer {
+ public:
+  StagingBuffer() : StagingBuffer(0) {}
+
+  // By default, staging buffers operate in "forward" mode: items are added to
+  // the end of the buffer. In "reverse" mode the cursor is at the end of the
+  // buffer, and items are added to the beginning.
+  //
+  // This must be called before the buffer is written.
+  void SetReversed() {
+    MOZ_ASSERT(IsEmpty());
+    mReversed = true;
+  }
+
+  // Write a series of components as a single item. When this is first used, the
+  // buffer records the initial item size and requires that all future items be
+  // the exact same size.
+  //
+  // This directs to either AppendItem or PrependItem depending on the buffer
+  // state.
+  template <typename T>
+  bool AddItem(const T& aItem) {
+    if (mReversed) {
+      return PrependItem(aItem);
+    }
+    return AppendItem(aItem);
+  }
+
+  // Helper for adding a single item as two components.
+  template <typename T1, typename T2>
+  bool AddItem(const T1& aItem1, const T2& aItem2) {
+    if (mReversed) {
+      return PrependItem(aItem1, aItem2);
+    }
+    return AppendItem(aItem1, aItem2);
+  }
+
+  // This may only be called on forward buffers.
+  template <typename T>
+  bool AppendItem(const T& aItem) {
+    MOZ_ASSERT(!mReversed);
+
+    size_t alignedBytes = AlignUp<Alignment>::calc(sizeof(aItem));
+    if (!mUniformSize) {
+      mUniformSize = alignedBytes;
+    }
+
+    if (!EnsureForwardRoomFor(alignedBytes)) {
+      return false;
+    }
+    if (mUniformSize != alignedBytes) {
+      MOZ_ASSERT_UNREACHABLE("item of incorrect size added!");
+      return false;
+    }
+
+    *reinterpret_cast<T*>(mPos) = aItem;
+    mPos += alignedBytes;
+    MOZ_ASSERT(mPos <= mEnd);
+
+    mNumItems++;
+    return true;
+  }
+
+  // Append an item in two stages.
+  template <typename T1, typename T2>
+  bool AppendItem(const T1& aFirst, const T2& aSecond) {
+    struct Combined {
+      T1 first;
+      T2 second;
+    } value = {aFirst, aSecond};
+
+    // The combined value must be packed.
+    static_assert(sizeof(value) == sizeof(aFirst) + sizeof(aSecond),
+                  "Items must be packed within struct");
+    return AppendItem(value);
+  }
+
+  // This may only be called on reversed buffers.
+  template <typename T>
+  bool PrependItem(const T& aItem) {
+    MOZ_ASSERT(mReversed);
+
+    size_t alignedBytes = AlignUp<Alignment>::calc(sizeof(aItem));
+    if (!mUniformSize) {
+      mUniformSize = alignedBytes;
+    }
+
+    if (!EnsureBackwardRoomFor(alignedBytes)) {
+      return false;
+    }
+    if (mUniformSize != alignedBytes) {
+      MOZ_ASSERT_UNREACHABLE("item of incorrect size added!");
+      return false;
+    }
+
+    mPos -= alignedBytes;
+    *reinterpret_cast<T*>(mPos) = aItem;
+    MOZ_ASSERT(mPos >= mBuffer.get());
+
+    mNumItems++;
+    return true;
+  }
+
+  // Prepend an item in two stages.
+  template <typename T1, typename T2>
+  bool PrependItem(const T1& aFirst, const T2& aSecond) {
+    struct Combined {
+      T1 first;
+      T2 second;
+    } value = {aFirst, aSecond};
+
+    // The combined value must be packed.
+    static_assert(sizeof(value) == sizeof(aFirst) + sizeof(aSecond),
+                  "Items must be packed within struct");
+    return PrependItem(value);
+  }
+
+  size_t NumBytes() const {
+    return mReversed ? mEnd - mPos : mPos - mBuffer.get();
+  }
+  uint8_t* GetBufferStart() const { return mReversed ? mPos : mBuffer.get(); }
+  size_t SizeOfItem() const { return mUniformSize; }
+  size_t NumItems() const { return mNumItems; }
+
+  void Reset() {
+    mPos = mReversed ? mEnd : mBuffer.get();
+    mUniformSize = 0;
+    mNumItems = 0;
+  }
+
+  // RestorePosition must only be called with a previous call to
+  // GetPosition.
+  typedef std::pair<size_t, size_t> Position;
+  Position GetPosition() const { return std::make_pair(NumBytes(), mNumItems); }
+  void RestorePosition(const Position& aPosition) {
+    mPos = mBuffer.get() + aPosition.first;
+    mNumItems = aPosition.second;
+    if (mNumItems == 0) {
+      mUniformSize = 0;
+    }
+
+    // Make sure the buffer is still coherent.
+    MOZ_ASSERT(mPos >= mBuffer.get() && mPos <= mEnd);
+    MOZ_ASSERT(mNumItems * mUniformSize == NumBytes());
+  }
+
+  bool IsEmpty() const { return mNumItems == 0; }
+
+ protected:
+  explicit StagingBuffer(size_t aMaxSize)
+      : mPos(nullptr),
+        mEnd(nullptr),
+        mUniformSize(0),
+        mNumItems(0),
+        mMaxSize(aMaxSize),
+        mReversed(false) {}
+
+  static const size_t kDefaultSize = 8;
+
+  bool EnsureForwardRoomFor(size_t aAlignedBytes) {
+    if (size_t(mEnd - mPos) < aAlignedBytes) {
+      return GrowBuffer(aAlignedBytes);
+    }
+    return true;
+  }
+
+  bool EnsureBackwardRoomFor(size_t aAlignedBytes) {
+    if (size_t(mPos - mBuffer.get()) < aAlignedBytes) {
+      return GrowBuffer(aAlignedBytes);
+    }
+    return true;
+  }
+
+  bool GrowBuffer(size_t aAlignedBytes) {
+    // We should not be writing items that are potentially bigger than the
+    // maximum constant buffer size, that's crazy. An assert should be good
+    // enough since the size of writes is static - and shader compilers
+    // would explode anyway.
+    MOZ_ASSERT_IF(mMaxSize, aAlignedBytes < mMaxSize);
+    MOZ_ASSERT_IF(mMaxSize, kDefaultSize * Alignment < mMaxSize);
+
+    if (!mBuffer) {
+      size_t newSize = std::max(kDefaultSize * Alignment, aAlignedBytes);
+      MOZ_ASSERT_IF(mMaxSize, newSize < mMaxSize);
+
+      mBuffer = MakeUnique<uint8_t[]>(newSize);
+      mEnd = mBuffer.get() + newSize;
+      mPos = mReversed ? mEnd : mBuffer.get();
+      return true;
+    }
+
+    // Take the bigger of exact-fit or 1.5x the previous size, and make sure
+    // the new size doesn't overflow size_t. If needed, clamp to the max
+    // size.
+    size_t oldSize = mEnd - mBuffer.get();
+    size_t trySize = std::max(oldSize + aAlignedBytes, oldSize + oldSize / 2);
+    size_t newSize = mMaxSize ? std::min(trySize, mMaxSize) : trySize;
+    if (newSize < oldSize || newSize - oldSize < aAlignedBytes) {
+      return false;
+    }
+
+    UniquePtr<uint8_t[]> newBuffer = MakeUnique<uint8_t[]>(newSize);
+    if (!newBuffer) {
+      return false;
+    }
+
+    // When the buffer is in reverse mode, we have to copy from the end of the
+    // buffer, not the beginning.
+    if (mReversed) {
+      size_t usedBytes = mEnd - mPos;
+      size_t newPos = newSize - usedBytes;
+      MOZ_RELEASE_ASSERT(newPos + usedBytes <= newSize);
+
+      memcpy(newBuffer.get() + newPos, mPos, usedBytes);
+      mPos = newBuffer.get() + newPos;
+    } else {
+      size_t usedBytes = mPos - mBuffer.get();
+      MOZ_RELEASE_ASSERT(usedBytes <= newSize);
+
+      memcpy(newBuffer.get(), mBuffer.get(), usedBytes);
+      mPos = newBuffer.get() + usedBytes;
+    }
+    mEnd = newBuffer.get() + newSize;
+    mBuffer = std::move(newBuffer);
+
+    MOZ_RELEASE_ASSERT(mPos >= mBuffer.get() && mPos <= mEnd);
+    return true;
+  }
+
+ protected:
+  UniquePtr<uint8_t[]> mBuffer;
+  uint8_t* mPos;
+  uint8_t* mEnd;
+  size_t mUniformSize;
+  size_t mNumItems;
+  size_t mMaxSize;
+  bool mReversed;
+};
+
+class ConstantStagingBuffer : public StagingBuffer<16> {
+ public:
+  explicit ConstantStagingBuffer(MLGDevice* aDevice);
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_StagingBuffer_h
diff --git a/gfx/layers/mlgpu/TextureSourceProviderMLGPU.cpp b/gfx/layers/mlgpu/TextureSourceProviderMLGPU.cpp
new file mode 100644
index 0000000000..7ddf475588
--- /dev/null
+++ b/gfx/layers/mlgpu/TextureSourceProviderMLGPU.cpp
@@ -0,0 +1,96 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "TextureSourceProviderMLGPU.h"
+#include "LayerManagerMLGPU.h"
+#include "MLGDevice.h"
+#ifdef XP_WIN
+#  include "mozilla/layers/MLGDeviceD3D11.h"
+#endif
+
+namespace mozilla {
+namespace layers {
+
+TextureSourceProviderMLGPU::TextureSourceProviderMLGPU(
+    LayerManagerMLGPU* aLayerManager, MLGDevice* aDevice)
+    : mLayerManager(aLayerManager), mDevice(aDevice) {}
+
+TextureSourceProviderMLGPU::~TextureSourceProviderMLGPU() = default;
+
+int32_t TextureSourceProviderMLGPU::GetMaxTextureSize() const {
+  if (!mDevice) {
+    return 0;
+  }
+  return mDevice->GetMaxTextureSize();
+}
+
+bool TextureSourceProviderMLGPU::SupportsEffect(EffectTypes aEffect) {
+  switch (aEffect) {
+    case EffectTypes::YCBCR:
+      return true;
+    default:
+      MOZ_ASSERT_UNREACHABLE("NYI");
+  }
+  return false;
+}
+
+bool TextureSourceProviderMLGPU::IsValid() const { return !!mLayerManager; }
+
+void TextureSourceProviderMLGPU::Destroy() {
+  mLayerManager = nullptr;
+  mDevice = nullptr;
+  TextureSourceProvider::Destroy();
+}
+
+#ifdef XP_WIN
+ID3D11Device* TextureSourceProviderMLGPU::GetD3D11Device() const {
+  if (!mDevice) {
+    return nullptr;
+  }
+  return mDevice->AsD3D11()->GetD3D11Device();
+}
+#endif
+
+TimeStamp TextureSourceProviderMLGPU::GetLastCompositionEndTime() const {
+  if (!mLayerManager) {
+    return TimeStamp();
+  }
+  return mLayerManager->GetLastCompositionEndTime();
+}
+
+already_AddRefed<DataTextureSource>
+TextureSourceProviderMLGPU::CreateDataTextureSource(TextureFlags aFlags) {
+  RefPtr<DataTextureSource> texture = mDevice->CreateDataTextureSource(aFlags);
+  return texture.forget();
+}
+
+already_AddRefed<DataTextureSource>
+TextureSourceProviderMLGPU::CreateDataTextureSourceAround(
+    gfx::DataSourceSurface* aSurface) {
+  MOZ_ASSERT_UNREACHABLE("NYI");
+  return nullptr;
+}
+
+void TextureSourceProviderMLGPU::UnlockAfterComposition(TextureHost* aTexture) {
+  TextureSourceProvider::UnlockAfterComposition(aTexture);
+
+  // If this is being called after we shutdown the compositor, we must finish
+  // read unlocking now to prevent a cycle.
+  if (!IsValid()) {
+    ReadUnlockTextures();
+  }
+}
+
+bool TextureSourceProviderMLGPU::NotifyNotUsedAfterComposition(
+    TextureHost* aTextureHost) {
+  if (!IsValid()) {
+    return false;
+  }
+  return TextureSourceProvider::NotifyNotUsedAfterComposition(aTextureHost);
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/TextureSourceProviderMLGPU.h b/gfx/layers/mlgpu/TextureSourceProviderMLGPU.h
new file mode 100644
index 0000000000..4210206c72
--- /dev/null
+++ b/gfx/layers/mlgpu/TextureSourceProviderMLGPU.h
@@ -0,0 +1,56 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_TextureSourceProviderMLGPU_h
+#define mozilla_gfx_layers_mlgpu_TextureSourceProviderMLGPU_h
+
+#include "mozilla/layers/TextureSourceProvider.h"
+
+namespace mozilla {
+namespace layers {
+
+class MLGDevice;
+class LayerManagerMLGPU;
+
+class TextureSourceProviderMLGPU final : public TextureSourceProvider {
+ public:
+  TextureSourceProviderMLGPU(LayerManagerMLGPU* aLayerManager,
+                             MLGDevice* aDevice);
+  virtual ~TextureSourceProviderMLGPU();
+
+  already_AddRefed<DataTextureSource> CreateDataTextureSource(
+      TextureFlags aFlags) override;
+
+  already_AddRefed<DataTextureSource> CreateDataTextureSourceAround(
+      gfx::DataSourceSurface* aSurface) override;
+
+  void UnlockAfterComposition(TextureHost* aTexture) override;
+  bool NotifyNotUsedAfterComposition(TextureHost* aTextureHost) override;
+
+  int32_t GetMaxTextureSize() const override;
+  TimeStamp GetLastCompositionEndTime() const override;
+  bool SupportsEffect(EffectTypes aEffect) override;
+  bool IsValid() const override;
+
+#ifdef XP_WIN
+  ID3D11Device* GetD3D11Device() const override;
+#endif
+
+  void ReadUnlockTextures() { TextureSourceProvider::ReadUnlockTextures(); }
+
+  // Release references to the layer manager.
+  void Destroy() override;
+
+ private:
+  // Using RefPtr<> here would be a circular reference.
+  LayerManagerMLGPU* mLayerManager;
+  RefPtr<MLGDevice> mDevice;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_TextureSourceProviderMLGPU_h
diff --git a/gfx/layers/mlgpu/TexturedLayerMLGPU.cpp b/gfx/layers/mlgpu/TexturedLayerMLGPU.cpp
new file mode 100644
index 0000000000..0cb8ccfc16
--- /dev/null
+++ b/gfx/layers/mlgpu/TexturedLayerMLGPU.cpp
@@ -0,0 +1,196 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "TexturedLayerMLGPU.h"
+#include "LayerManagerMLGPU.h"
+#include "RenderViewMLGPU.h"
+#include "FrameBuilder.h"
+#include "mozilla/gfx/Types.h"
+#include "mozilla/layers/ImageHost.h"
+#include "UnitTransforms.h"
+
+namespace mozilla {
+namespace layers {
+
+using namespace gfx;
+
+TexturedLayerMLGPU::TexturedLayerMLGPU(LayerManagerMLGPU* aManager)
+    : LayerMLGPU(aManager) {}
+
+TexturedLayerMLGPU::~TexturedLayerMLGPU() {
+  // Note: we have to cleanup resources in derived classes, since we can't
+  // easily tell in our destructor if we have a TempImageLayerMLGPU, which
+  // should not have its compositable detached, and we can't call GetLayer
+  // here.
+}
+
+bool TexturedLayerMLGPU::SetCompositableHost(CompositableHost* aHost) {
+  switch (aHost->GetType()) {
+    case CompositableType::IMAGE:
+      mHost = aHost->AsImageHost();
+      return true;
+    default:
+      return false;
+  }
+}
+
+CompositableHost* TexturedLayerMLGPU::GetCompositableHost() {
+  if (mHost && mHost->IsAttached()) {
+    return mHost.get();
+  }
+  return nullptr;
+}
+
+RefPtr<TextureSource> TexturedLayerMLGPU::BindAndGetTexture() {
+  if (!mHost) {
+    return nullptr;
+  }
+
+  LayerManagerMLGPU* lm = GetLayerManager()->AsLayerManagerMLGPU();
+
+  // Note: we don't call FinishRendering since mask layers do not need
+  // composite notifications or bias updates. (This function should
+  // not be called for non-mask-layers).
+  ImageHost::RenderInfo info;
+  if (!mHost->PrepareToRender(lm->GetTextureSourceProvider(), &info)) {
+    return nullptr;
+  }
+
+  RefPtr<TextureSource> source = mHost->AcquireTextureSource(info);
+  if (!source) {
+    return nullptr;
+  }
+
+  mTexture = source;
+  return source;
+}
+
+bool TexturedLayerMLGPU::OnPrepareToRender(FrameBuilder* aBuilder) {
+  if (!mHost) {
+    return false;
+  }
+
+  LayerManagerMLGPU* lm = GetLayerManager()->AsLayerManagerMLGPU();
+
+  ImageHost::RenderInfo info;
+  if (!mHost->PrepareToRender(lm->GetTextureSourceProvider(), &info)) {
+    return false;
+  }
+
+  RefPtr<TextureSource> source = mHost->AcquireTextureSource(info);
+  if (!source) {
+    return false;
+  }
+
+  if (source->AsBigImageIterator()) {
+    mBigImageTexture = source;
+    mTexture = nullptr;
+  } else {
+    mTexture = source;
+  }
+
+  mPictureRect = IntRect(0, 0, info.img->mPictureRect.Width(),
+                         info.img->mPictureRect.Height());
+
+  mHost->FinishRendering(info);
+  return true;
+}
+
+void TexturedLayerMLGPU::AssignToView(FrameBuilder* aBuilder,
+                                      RenderViewMLGPU* aView,
+                                      Maybe<Polygon>&& aGeometry) {
+  if (mBigImageTexture) {
+    BigImageIterator* iter = mBigImageTexture->AsBigImageIterator();
+    iter->BeginBigImageIteration();
+    AssignBigImage(aBuilder, aView, iter, aGeometry);
+    iter->EndBigImageIteration();
+  } else {
+    LayerMLGPU::AssignToView(aBuilder, aView, std::move(aGeometry));
+  }
+}
+
+void TexturedLayerMLGPU::AssignBigImage(FrameBuilder* aBuilder,
+                                        RenderViewMLGPU* aView,
+                                        BigImageIterator* aIter,
+                                        const Maybe<Polygon>& aGeometry) {
+  const Matrix4x4& transform = GetLayer()->GetEffectiveTransformForBuffer();
+
+  // Note that we don't need to assign these in any particular order, since
+  // they do not overlap.
+  do {
+    IntRect tileRect = aIter->GetTileRect();
+    IntRect rect = tileRect.Intersect(mPictureRect);
+    if (rect.IsEmpty()) {
+      continue;
+    }
+
+    {
+      Rect screenRect = transform.TransformBounds(Rect(rect));
+      screenRect.MoveBy(-aView->GetTargetOffset());
+      screenRect =
+          screenRect.Intersect(Rect(mComputedClipRect.ToUnknownRect()));
+      if (screenRect.IsEmpty()) {
+        // This tile is not in the clip region, so skip it.
+        continue;
+      }
+    }
+
+    RefPtr<TextureSource> tile = mBigImageTexture->ExtractCurrentTile();
+    if (!tile) {
+      continue;
+    }
+
+    // Create a temporary item.
+    RefPtr<TempImageLayerMLGPU> item =
+        new TempImageLayerMLGPU(aBuilder->GetManager());
+    item->Init(this, tile, rect);
+
+    Maybe<Polygon> geometry = aGeometry;
+    item->AddBoundsToView(aBuilder, aView, std::move(geometry));
+
+    // Since the layer tree is not holding this alive, we have to ask the
+    // FrameBuilder to do it for us.
+    aBuilder->RetainTemporaryLayer(item);
+  } while (aIter->NextTile());
+}
+
+TempImageLayerMLGPU::TempImageLayerMLGPU(LayerManagerMLGPU* aManager)
+    : ImageLayer(aManager, static_cast<HostLayer*>(this)),
+      TexturedLayerMLGPU(aManager),
+      mFilter(gfx::SamplingFilter::GOOD),
+      mIsOpaque(false) {}
+
+TempImageLayerMLGPU::~TempImageLayerMLGPU() = default;
+
+void TempImageLayerMLGPU::Init(TexturedLayerMLGPU* aSource,
+                               const RefPtr<TextureSource>& aTexture,
+                               const gfx::IntRect& aPictureRect) {
+  // ImageLayer properties.
+  mEffectiveTransform = aSource->GetLayer()->GetEffectiveTransform();
+  mEffectiveTransformForBuffer =
+      aSource->GetLayer()->GetEffectiveTransformForBuffer();
+
+  // Base LayerMLGPU properties.
+  mComputedClipRect = aSource->GetComputedClipRect();
+  mMask = aSource->GetMask();
+  mComputedOpacity = aSource->GetComputedOpacity();
+
+  // TexturedLayerMLGPU properties.
+  mHost = aSource->GetImageHost();
+  mTexture = aTexture;
+  mPictureRect = aPictureRect;
+
+  // Local properties.
+  mFilter = aSource->GetSamplingFilter();
+  mShadowVisibleRegion = aSource->GetShadowVisibleRegion();
+  mIsOpaque = aSource->IsContentOpaque();
+
+  // Set this layer to prepared so IsPrepared() assertions don't fire.
+  MarkPrepared();
+}
+
+}  // namespace layers
+}  // namespace mozilla
diff --git a/gfx/layers/mlgpu/TexturedLayerMLGPU.h b/gfx/layers/mlgpu/TexturedLayerMLGPU.h
new file mode 100644
index 0000000000..e6b4e69587
--- /dev/null
+++ b/gfx/layers/mlgpu/TexturedLayerMLGPU.h
@@ -0,0 +1,90 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_TexturedLayerMLGPU_h
+#define mozilla_gfx_layers_mlgpu_TexturedLayerMLGPU_h
+
+#include "LayerMLGPU.h"
+#include "ImageLayers.h"
+#include "mozilla/layers/ImageHost.h"
+
+namespace mozilla {
+namespace layers {
+
+// This is the base class for canvas and image layers.
+class TexturedLayerMLGPU : public LayerMLGPU {
+ public:
+  TexturedLayerMLGPU* AsTexturedLayerMLGPU() override { return this; }
+
+  virtual gfx::SamplingFilter GetSamplingFilter() = 0;
+
+  bool SetCompositableHost(CompositableHost* aHost) override;
+  CompositableHost* GetCompositableHost() override;
+
+  void AssignToView(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                    Maybe<gfx::Polygon>&& aGeometry) override;
+
+  TextureSource* GetTexture() const { return mTexture; }
+  ImageHost* GetImageHost() const { return mHost; }
+
+  // Return the scale factor from the texture source to the picture rect.
+  virtual Maybe<gfx::Size> GetPictureScale() const { return Nothing(); }
+
+  // Mask layers aren't prepared like normal layers. They are bound as
+  // mask operations are built. Mask layers are never tiled (they are
+  // scaled to a lower resolution if too big), so this pathway returns
+  // a TextureSource.
+  RefPtr<TextureSource> BindAndGetTexture();
+
+ protected:
+  explicit TexturedLayerMLGPU(LayerManagerMLGPU* aManager);
+  virtual ~TexturedLayerMLGPU();
+
+  void AssignBigImage(FrameBuilder* aBuilder, RenderViewMLGPU* aView,
+                      BigImageIterator* aIter,
+                      const Maybe<gfx::Polygon>& aGeometry);
+
+  bool OnPrepareToRender(FrameBuilder* aBuilder) override;
+
+ protected:
+  RefPtr<ImageHost> mHost;
+  RefPtr<TextureSource> mTexture;
+  RefPtr<TextureSource> mBigImageTexture;
+  gfx::IntRect mPictureRect;
+};
+
+// This is a pseudo layer that wraps a tile in an ImageLayer backed by a
+// BigImage. Without this, we wouldn't have anything sensible to add to
+// RenderPasses. In the future we could potentially consume the source
+// layer more intelligently instead (for example, having it compute
+// which textures are relevant for a given tile).
+class TempImageLayerMLGPU final : public ImageLayer, public TexturedLayerMLGPU {
+ public:
+  explicit TempImageLayerMLGPU(LayerManagerMLGPU* aManager);
+
+  // Layer
+  HostLayer* AsHostLayer() override { return this; }
+  gfx::SamplingFilter GetSamplingFilter() override { return mFilter; }
+  bool IsContentOpaque() override { return mIsOpaque; }
+
+  void Init(TexturedLayerMLGPU* aSource, const RefPtr<TextureSource>& aTexture,
+            const gfx::IntRect& aPictureRect);
+
+  // HostLayer
+  Layer* GetLayer() override { return this; }
+
+ protected:
+  virtual ~TempImageLayerMLGPU();
+
+ private:
+  gfx::SamplingFilter mFilter;
+  bool mIsOpaque;
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#endif  // mozilla_gfx_layers_mlgpu_TexturedLayerMLGPU_h
diff --git a/gfx/layers/mlgpu/UtilityMLGPU.h b/gfx/layers/mlgpu/UtilityMLGPU.h
new file mode 100644
index 0000000000..2432bf88b6
--- /dev/null
+++ b/gfx/layers/mlgpu/UtilityMLGPU.h
@@ -0,0 +1,45 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* vim: set ts=8 sts=2 et sw=2 tw=80: */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef mozilla_gfx_layers_mlgpu_UtilityMLGPU_h
+#define mozilla_gfx_layers_mlgpu_UtilityMLGPU_h
+
+#include "mozilla/Assertions.h"
+#include "mozilla/MathAlgorithms.h"
+
+namespace mozilla {
+namespace layers {
+
+template <size_t T>
+struct AlignUp {
+  static inline size_t calc(size_t aAmount) {
+    MOZ_ASSERT(IsPowerOfTwo(T), "alignment must be a power of two");
+    return aAmount + ((T - (aAmount % T)) % T);
+  }
+};
+
+template <>
+struct AlignUp<0> {
+  static inline size_t calc(size_t aAmount) { return aAmount; }
+};
+
+}  // namespace layers
+}  // namespace mozilla
+
+#ifdef ENABLE_AL_LOGGING
+#  define AL_LOG(...) printf_stderr("AL: " __VA_ARGS__)
+#  define AL_LOG_IF(cond, ...)             \
+    do {                                   \
+      if (cond) {                          \
+        printf_stderr("AL: " __VA_ARGS__); \
+      }                                    \
+    } while (0)
+#else
+#  define AL_LOG(...)
+#  define AL_LOG_IF(...)
+#endif
+
+#endif  // mozilla_gfx_layers_mlgpu_UtilityMLGPU_h